twoafternoon.trtlive, twoafternoon.any.trtlive

reading DEG

twoafternoon.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )
twoafternoon.any.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.any.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.soil_trtSBC_OLD = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day03 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day04 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day06 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day08 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day10 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day13 = col_double(),
##   logFC.soil_trtSBC_OLD.sampling_day14 = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )
diurnal34.time.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","dge.diurnal34.time.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.sampling_time2_afternoon = col_double(),
##   logFC.sampling_time3_evening_5.30 = col_double(),
##   logFC.sampling_time4_night_1 = col_double(),
##   logFC.sampling_time5_night_2 = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )
diurnal1314.time.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","dge.diurnal1314.time.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
##   genes = col_character(),
##   logFC.sampling_time2_afternoon = col_double(),
##   logFC.sampling_time3_evening_5.30 = col_double(),
##   logFC.sampling_time4_night_1 = col_double(),
##   logFC.sampling_time5_night_2 = col_double(),
##   logCPM = col_double(),
##   LR = col_double(),
##   PValue = col_double(),
##   FDR = col_double(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_short_description = col_character(),
##   perc_ID = col_double()
## )
# check
diurnal1314.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dim() # [1] 12080    13
## [1] 12080    13

format data

# select genes with higher CV 
## classic way
co.var.df <- function(x) ( 100*apply(x,1,sd)/rowMeans(x) )
cpm.timecourse.v3.0$cv<-co.var.df(cpm.timecourse.v3.0[,-1])
# tidyverse way (no working)
#cpm.timecourse.v3.0 %>% slice(1:100) %>% select(-1) %>% group_by(%>% mutate(cv=map(.,co.var.df ))
a<-hist(cpm.timecourse.v3.0$cv)

a
## $breaks
##  [1]   0  50 100 150 200 250 300 350 400 450 500 550 600 650 700
## 
## $counts
##  [1] 19889  5976   989   270    87    45    24    12     7     4     2     1
## [13]     1     1
## 
## $density
##  [1] 1.456643e-02 4.376739e-03 7.243299e-04 1.977443e-04 6.371759e-05
##  [6] 3.295738e-05 1.757727e-05 8.788633e-06 5.126703e-06 2.929544e-06
## [11] 1.464772e-06 7.323861e-07 7.323861e-07 7.323861e-07
## 
## $mids
##  [1]  25  75 125 175 225 275 325 375 425 475 525 575 625 675
## 
## $xname
## [1] "cpm.timecourse.v3.0$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# there are genes with extream value
cpm.timecourse.v3.0 %>% filter(cv>600)
# Check expression pattern
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = cpm.timecourse.v3.0 %>% dplyr::filter(cv>450) %>% dplyr::slice(1:20)) ->p
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
p

ggsave(filename="../output/highCV.absvalue.genes.expression.png",width=11,height=8) # should I remove them????
# 
sum(as.integer(cpm.timecourse.v3.0$cv>30))/dim(cpm.timecourse.v3.0)[1] # [1] 0.5207265
## [1] 0.5207265
sum(as.integer(cpm.timecourse.v3.0$cv>40))/dim(cpm.timecourse.v3.0)[1] # [1] 0.3725282. Larger CV than SAS timecourse data ()??? Due to non log absolute expression value.
## [1] 0.3725282
# cf. sum(as.integer(SAS.expression.vst.s.kazu$cv>4.5))/dim(SAS.expression.vst.s.kazu)[1] #[1] 0.2300789

use lon transformed data

cpm.timecourse.v3.0.log$cv<-co.var.df(cpm.timecourse.v3.0.log[,-1])
b<-hist(cpm.timecourse.v3.0.log$cv)

b
## $breaks
##  [1] -300000 -280000 -260000 -240000 -220000 -200000 -180000 -160000 -140000
## [10] -120000 -100000  -80000  -60000  -40000  -20000       0   20000   40000
## [19]   60000   80000  100000  120000  140000
## 
## $counts
##  [1]     1     0     0     1     1     0     0     0     1     0     1     1
## [13]     1     7  1648 25634     7     1     1     2     0     1
## 
## $density
##  [1] 1.830965e-09 0.000000e+00 0.000000e+00 1.830965e-09 1.830965e-09
##  [6] 0.000000e+00 0.000000e+00 0.000000e+00 1.830965e-09 0.000000e+00
## [11] 1.830965e-09 1.830965e-09 1.830965e-09 1.281676e-08 3.017431e-06
## [16] 4.693496e-05 1.281676e-08 1.830965e-09 1.830965e-09 3.661931e-09
## [21] 0.000000e+00 1.830965e-09
## 
## $mids
##  [1] -290000 -270000 -250000 -230000 -210000 -190000 -170000 -150000 -130000
## [10] -110000  -90000  -70000  -50000  -30000  -10000   10000   30000   50000
## [19]   70000   90000  110000  130000
## 
## $xname
## [1] "cpm.timecourse.v3.0.log$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
# use largeCV 
cpm.timecourse.v3.0.log.largeCV<-cpm.timecourse.v3.0.log[cpm.timecourse.v3.0[cpm.timecourse.v3.0$cv>40,"transcript_ID"],] 
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262   289  > [1] 10173   290  (02/01/2020) (cf. SAS.expression.vst.s.kazu.largeCV is 7025 288)
## [1] 10173   290
c<-hist(cpm.timecourse.v3.0.log.largeCV$cv)

c
## $breaks
##  [1] -30000 -25000 -20000 -15000 -10000  -5000      0   5000  10000  15000
## [11]  20000  25000  30000  35000
## 
## $counts
##  [1]    1    0    3    2    6  348 5692    6    4    1    1    0    1
## 
## $density
##  [1] 3.297609e-08 0.000000e+00 9.892828e-08 6.595218e-08 1.978566e-07
##  [6] 1.147568e-05 1.876999e-04 1.978566e-07 1.319044e-07 3.297609e-08
## [11] 3.297609e-08 0.000000e+00 3.297609e-08
## 
## $mids
##  [1] -27500 -22500 -17500 -12500  -7500  -2500   2500   7500  12500  17500
## [11]  22500  27500  32500
## 
## $xname
## [1] "cpm.timecourse.v3.0.log.largeCV$cv"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
###########
#save(cpm.timecourse.v3.0.log.largeCV,file=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
write_csv(cpm.timecourse.v3.0.log.largeCV,path=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))

WGCNA

co-expression analysis by WGCNA

# The following setting is important, do not omit.
library(WGCNA) # errors in installing WGCNA on my computer at impute package installation (Jan 27, 2020). Use Whitney
options(stringsAsFactors = FALSE)
if(Sys.info()["nodename"]=="whitney") {
  enableWGCNAThreads(10) # in Whitney (Maloof lab server) 
} else if (Sys.info()["nodename"]=="Kazu-MBP.plb.ucdavis.edu") {
      enableWGCNAThreads(2) # in my computer
  }

run this in Whitney

#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# for some reasons in Whitney library columns were read ad character. Needs to fix it.
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"),
#                                          col_types=list(col_character(),col_double())) # error
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) # using classic read.csv in Whitney

#load(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
# 
 datExpr <-t(cpm.timecourse.v3.0.log.largeCV[,-1])
  # Choose a set of soft-thresholding powers
  powers = c(c(1:9), seq(from = 2, to=20, by=10))
  sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)  
  # Plot the results:
  #sizeGrWindow(9, 5)
  pdf("../output/largeCV.softthresholding.pdf",width=10,height=8)
  par(mfrow = c(1,2));
  cex1 = 0.9;
  # Scale-free topology fit index as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
       main = paste("Scale independence"));
  text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
       labels=powers,cex=cex1,col="red");
  # this line corresponds to using an R^2 cut-off of h
  abline(h=0.90,col="red")
  # Mean connectivity as a function of the soft-thresholding power
  plot(sft$fitIndices[,1], sft$fitIndices[,5],
       xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
       main = paste("Mean connectivity"))
  text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
  dev.off()
  # 
  net = blockwiseModules(datExpr, power = 9,
                         TOMType = "unsigned", minModuleSize = 20,
                         reassignThreshold = 0, mergeCutHeight = 0.25,
                         numericLabels = TRUE, pamRespectsDendro = FALSE,
                         saveTOMs = TRUE,
                         saveTOMFileBase = "cpm.timecourse.v3.0.log.largeCV.TOM",
                         verbose = 3)
  save(net,file="../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")  
  # open a graphics window
  pdf(file="../output/largeCV.dendrogram.pdf",width=10,height=8)
  # Convert labels to colors for plotting
  mergedColors = labels2colors(net$colors)
  # Plot the dendrogram and the module colors underneath
  plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
                      "Module colors",
                      dendroLabels = FALSE, hang = 0.03,
                      addGuide = TRUE, guideHang = 0.05)
  dev.off()
  # save parameters  
  moduleLabels = net$colors
  moduleColors = labels2colors(net$colors)
  MEs = net$MEs
  geneTree = net$dendrograms[[1]]
  save(MEs, moduleLabels, moduleColors, geneTree,file ="../output/all.largeCV.RData")

back to my vomputer to look WGCNA results

cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) 
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262   289 -> [1] 10173   290 (Feb 01, 2020)
## [1] 10173   290
load("../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")  
load("../output/all.largeCV.RData")
# how many modules?
  table(net$colors);length(table(net$colors)) # 7 modules
## 
##    0    1    2    3    4    5    6 
## 4968 4723  174  126   79   72   31
## [1] 7

adding gene name, annotations

cpm.timecourse.v3.0.log.largeCV.modules <- tibble(
  transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,
  modules=moduleColors
)
#cpm.timecourse.v3.0.log.largeCV.modules.list<-list(transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,modules=moduleColors)
## prep
# annotation file for v3.0annotation
Br.v3.0.At.BLAST <- read_csv(file.path("..","Annotation_copy","output","v3.0annotation","Brapa_v3.0_annotated.csv")) 
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   name = col_character(),
##   chrom = col_character(),
##   subject = col_character(),
##   AGI = col_character(),
##   At_symbol = col_character(),
##   At_full_name = col_character(),
##   At_gene_model_type = col_character(),
##   At_short_description = col_character(),
##   At_Curator_summary = col_character(),
##   At_Computational_description = col_character()
## )
## See spec(...) for full column specifications.
# This annotation is redundant with name (Br grene). Eg 
Br.v3.0.At.BLAST %>% filter(name=="BraA01g040570.3C")
# reduce the redundancy (112418)
Br.v3.0anno.At.BLAST.highscore <- Br.v3.0.At.BLAST %>% group_by(name) %>% arrange(desc(score)) %>% dplyr::slice(1)
# function for adding annotation
## get object name https://stackoverflow.com/questions/14577412/how-to-convert-variable-object-name-into-string
myfunc <- function(v1) {
  deparse(substitute(v1))
}
myfunc(foo)
## [1] "foo"
# adding annotation and write_csv adding ".v3.0anno.csv" to the object name.
addAnno<-function(DGE) {temp<-left_join(DGE %>% rownames_to_column(var="genes"),Br.v3.0anno.At.BLAST.highscore,by=c(genes="name")) %>%  dplyr::select(genes,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)} 

asign moduleColor to corresponding Br genes

  #Br.v3.0anno.At.BLAST.highscore.list<-list()
  Bra.v3.0_cdna.list<-list()
  #names(Bra.v3.0_cdna.list)<-names(Bra.v3.0_cdna)
names(Bra.v3.0_cdna) %in% cpm.timecourse.v3.0.log.largeCV.modules$transcript_ID

  for(i in 1:length(Bra.v3.0_cdna)) {
    print(paste("i is ",i))
    print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID))
        print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim())
        print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim() ==c(1,1))
temp<-cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(transcript_ID)
print(dim(temp)[1]==0)
    if(dim(temp)[1]==0) next else
    #Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules[names(Bra.v3.0_cdna)[i],"modules"]
      # input module
Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(modules) %>% as_vector()
  # iput gene name
    names(Bra.v3.0_cdna.list)[[i]]<-names(Bra.v3.0_cdna)[i]
  }

# clean up Brgo.v3.0_cdna.list
  table(sapply(Bra.v3.0_cdna.list,is.null))
  Bra.v3.0_cdna.list<-Bra.v3.0_cdna.list[!sapply(Bra.v3.0_cdna.list,is.null)]
  table(sapply(Bra.v3.0_cdna.list,is.null))
  
  save(Bra.v3.0_cdna.list,file="../output/Bra.v3.0_cdna.list.Rdata")
 ######### Did not work
# cpm.timecourse.v3.0.log.largeCV.modules %>% nest(transcript_ID) # this is not what I want
# library(purrr)
#cpm.timecourse.v3.0.log.largeCV.modules %>% purrr::transpose() 

ORA analysis of DEGs

# loading module info as custom categories compatible with goseq()
load("../output/Bra.v3.0_cdna.list.Rdata")
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:glue':
## 
##     collapse, trim
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: XVector
## 
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: Rsamtools
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
## 
## Attaching package: 'GenomicAlignments'
## The following object is masked from 'package:dplyr':
## 
##     last
## 
## Attaching package: 'ShortRead'
## The following object is masked from 'package:dplyr':
## 
##     id
## The following object is masked from 'package:purrr':
## 
##     compose
## The following object is masked from 'package:tibble':
## 
##     view
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
## Attaching package: 'geneLenDataBase'
## The following object is masked from 'package:S4Vectors':
## 
##     unfactor
## 
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: XML
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file 
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
##   A DNAStringSet instance of length 46250
##         width seq                                           names               
##     [1]  1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
##     [2]  1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
##     [3]   957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
##     [4]  1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
##     [5]   774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
##     ...   ... ...
## [46246]   162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247]  1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248]  1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249]   870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250]  1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# special funciton for GOseq
GOseq.customcategory.ORA<-function(genelist,padjust=0.05,custom.category.list=Bra.v3.0_cdna.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05. 
  
  bias<-nchar(Br_cdna)
  names(bias)<-names(Br_cdna)
  TF<-(names(bias) %in% genelist)*1
  names(TF)<-names(bias)
  #print(TF)
  pwf<-nullp(TF,bias.data=bias)
  #print(pwf$DEgenes)
  GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
  #GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
  GO.pval$over_represented_padjust<-p.adjust(GO.pval$over_represented_pvalue,method="BH")
  if(GO.pval$over_represented_padjust[1]>padjust) return("no enriched GO")
  else {
    enriched.GO<-GO.pval[GO.pval$over_represented_padjust<padjust,] 
    print("enriched.GO is")
    print(enriched.GO)
    return(enriched.GO)
  }
}
gene.up<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC>0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
gene.down<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC<0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()

enriched.GO.up<-GOseq.customcategory.ORA(genelist=gene.up) # needs to wait for Bra.v3.0_cdna.list.Rdata ready in Whitney
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...

## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 13 lightyellow            5.053465e-08                1.0000000          7
## 21         tan            1.651936e-04                0.9999827          6
##    numInCat over_represented_padjust
## 13       32             1.162297e-06
## 21       73             1.899727e-03
enriched.GO.down<-GOseq.customcategory.ORA(genelist=gene.down)
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...

## [1] "enriched.GO is"
##      category over_represented_pvalue under_represented_pvalue numDEInCat
## 23     yellow           4.518946e-176                1.0000000        149
## 16       pink            2.553492e-32                1.0000000         40
## 12 lightgreen            1.895763e-05                0.9999984          7
## 17     purple            1.741131e-03                0.9995694          9
##    numInCat over_represented_padjust
## 23      242            1.039358e-174
## 16      128             2.936516e-31
## 12       33             1.453418e-04
## 17      101             1.001150e-02

expression pattern of module/genes of interest (logFC of soil treatment)

n<-1
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes=gene.up.category[1:10,]) # works
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Error: Faceting variables must have at least one value

# expression pattern of module/genes of interest (normalized value)

# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)

gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
gene.down.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.down,modules==enriched.GO.up$category[n])

expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(data=cpm.timecourse.v3.0.scale,target.genes=gene.up.category[1,]) 
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
## Error: Faceting variables must have at least one value

using map-column method???

input<-tribble(
  ~target.genes,~data,~f,
  gene.up.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,
  gene.down.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2
)
input2<-tribble(
  ~f,~param,
  expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.up.category[1:10,],data=cpm.timecourse.v3.0.scale),
  expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.down.category[1:10,],data=cpm.timecourse.v3.0.scale)
)

test<-input2 %>% mutate(output=invoke_map(f,param)) # works, but parameters are not visible
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector

## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# how about to use map2?
## an example
params<-tribble(
    ~mean,~sd,~n,
  5,1,1,
  10,5,3,
  -3,10,5
)
params %>% pmap(rnorm) 
## [[1]]
## [1] 4.401924
## 
## [[2]]
## [1] 13.629514  8.742931 10.435519
## 
## [[3]]
## [1] -19.4951056 -21.3628010 -17.8922653   0.3945985   4.3824799
# 
input3<-tribble(
  ~target.genes,~data,~title,
  gene.up.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil up",
  gene.down.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil down",
)
#input3 %>% pmap(expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2) -> expression.pattern
# 
expression.pattern <- input3 %>% mutate(plot=pmap(.,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2))
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector

## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
expression.pattern$plot[1] # plot
## [[1]]
## Error: Faceting variables must have at least one value

#input3 %>% mutate(plot=invoke_map(~expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2)) # errors

calculate fold change (trial with six genes) (run only once)

temp.abs<-cpm.timecourse.v3.0.log %>% head() %>%
  gather(sample,value,-transcript_ID) %>%
  mutate(abs.value=2^value) %>%
  inner_join(sample.description.timecourse, by="sample") %>% 
  split(.$soil_trt) 
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil) 
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time"),by="group")
# check logFC range
range(cpm.timecourse.v3.0.logFC$logFC) #(Jan 31, 2020)

calculate fold change (full genes) (run only once)

temp.abs<-cpm.timecourse.v3.0.log  %>%
  gather(sample,value,-transcript_ID) %>% mutate(abs.value=2^value) %>%
  inner_join(sample.description.timecourse, by="sample") %>% 
  split(.$soil_trt) 
# mean of absolute value funciton
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
# calculating absolute value mean
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil) 
# making summary tibble
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# add sample info
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse.logFC,by="group")
# check
dim(cpm.timecourse.v3.0.logFC)
# check frequency distribution
a<-hist(cpm.timecourse.v3.0.logFC$logFC) # most of them are small
a
# what are genes with super high logFC?
high.FC.genes<-cpm.timecourse.v3.0.logFC %>% filter(abs(logFC)>5) %>% dplyr::select(transcript_ID) 
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = high.FC.genes[1:10,])
#
addAnno2<-function(DGE) {temp<-left_join(DGE,Br.v3.0anno.At.BLAST.highscore,by=c("transcript_ID"="name")) %>%  dplyr::select(transcript_ID,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)} 
#
addAnno2(high.FC.genes)
#
dim(cpm.timecourse.v3.0.logFC) #[1] 1110000       5

#write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv") # too large (306 M)
write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv.gz") # 12.3 M

logFC expression pattern (only two afternoon)

cpm.timecourse.v3.0.logFC <-read_csv("../output/cpm.timecourse.v3.0.logFC.csv.gz")
## Parsed with column specification:
## cols(
##   transcript_ID = col_character(),
##   group = col_character(),
##   logFC = col_double(),
##   sampling_day = col_character(),
##   sampling_time = col_character()
## )
target.genes<-gene.up
# expression.pattern.Br.graph.timecourse.v3.0annotation.logFC<-function(data=cpm.timecourse.v3.0.logFC,target.genes,title="",subset.data="only_two_afternoon"){
# #print(paste("data is",data[1:10,]))
# #print(paste("tissue.type is root"))
# data[is.na(data)] <- 0 #
# data.temp<-data  %>% dplyr::filter(transcript_ID %in% target.genes) 
# 
# # if (2-afternoon=TRUE)
# if (subset.data=="only_two_afternoon") {
# p<-data.temp %>% ggplot(aes(x=sampling_day,y=logFC))  + 
#   geom_boxplot(alpha = 0.5)  + 
#   theme_bw() +
#   theme(strip.text.y=element_text(angle=0),axis.text.x=element_text(angle=90)) +
#   theme(legend.position="bottom") + labs(title=title)
# p
# } else {print("Define subset.data other than only_two_afternoon.")}
# }
# test the function
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")

J meeting (Jan 28, 2020)

K-means clustering

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>% 
  inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread) # [1] 1442  97
## [1] 1442   97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],
                                     centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.8 <- kClust.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.8) %>% group_by(cluster) %>% summarize(n=sum(cluster)) 
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}
kClustcentroids.8 <- sapply(levels(factor(kClusters.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.8)

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.8 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
  p8<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
p8

ggsave(p8,file="../output/Twoafternoon.DEG.Kmean.8clusters.png",width=11,height=8)

Let’s perform the actual clsutering using K=5:

set.seed(20)
kClust.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.5 <- kClust.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.5 <- sapply(levels(factor(kClusters.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.5)

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.5 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.5.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:480)[!duplicated(.$group.cluster)]) # only cluster 1... why???
# plot
  p5<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level") 
p5

ggsave(p5,file="../output/Twoafternoon.DEG.Kmean.5clusters.png",width=11,height=8)

logFC K-means

expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.logFC.twoafternoon.DEG<-cpm.timecourse.v3.0.logFC %>% 
  inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>% filter(sampling_time=="2_afternoon")
# spread
cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.logFC.twoafternoon.DEG %>% dplyr::select(transcript_ID,group,logFC) %>% spread(group,logFC,-1)
dim(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread) # [1] 1474  97
## [1] 1442    9
# calculate wss
wss.logFC <- (nrow(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss.logFC[i] <- sum(kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],
                                     centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss.logFC, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

# Let’s perform the actual clsutering using K=5:

set.seed(20)
kClust.logFC.5 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.logFC.5 <- kClust.logFC.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.logFC.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.logFC.5 <- sapply(levels(factor(kClusters.logFC.5)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.5)

Plotting the centroids to see how they behave: tidyverse version

# making sample.description.timecourse.logFC
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# plot
p.logFC.5<-kClustcentroids.logFC.5 %>% as_tibble(rownames="group") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse.logFC,by="group") %>% 
  inner_join(cluster.5.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) ) %>%
  ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") +
  facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level") 
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.5

ggsave(p.logFC.5,file="../output/Twoafternoon.DEG.logFC.Kmean.5clusters.png",width=11,height=8)

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.logFC.8 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.logFC.8 <- kClust.logFC.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.logFC.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way

Now we can calculate the cluster ‘cores’ aka centroids:

kClustcentroids.logFC.8 <- sapply(levels(factor(kClusters.logFC.8)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.8)

Plotting the centroids to see how they behave: tidyverse version

p.logFC.8<-kClustcentroids.logFC.8 %>% as_tibble(rownames="group") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse.logFC,by="group") %>% 
  inner_join(cluster.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) ) %>%
  ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") +
  facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.8

ggsave(p.logFC.8,file="../output/Twoafternoon.DEG.logFC.Kmean.8clusters.png",width=11,height=8)

conclusion

load Brgo.v3.0anno.Atgoslim.BP.list

load(file.path("..","Annotation_copy","output","v3.0annotation","Brgo.v3.0anno.Atgoslim.BP.list.Rdata"))

load GO.ORA function

GOseq function for Brassica rapa (v3.0)

# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file 
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
##   A DNAStringSet instance of length 46250
##         width seq                                           names               
##     [1]  1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
##     [2]  1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
##     [3]   957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
##     [4]  1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
##     [5]   774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
##     ...   ... ...
## [46246]   162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247]  1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248]  1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249]   870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250]  1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# GOseq function
GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA<-function(genelist,padjust=0.05,ontology="BP",custom.category.list=Brgo.v3.0anno.Atgoslim.BP.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05. 
  
  bias<-nchar(Br_cdna)
  names(bias)<-names(Br_cdna)
  TF<-(names(bias) %in% genelist)*1
  names(TF)<-names(bias)
  #print(TF)
  pwf<-nullp(TF,bias.data=bias)
  #print(pwf$DEgenes)
  GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
  #GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
  
  #head(GO.pval) 
  if(ontology=="BP") {
    GO.pval2<-subset(GO.pval,ontology=="BP")
  } else if(ontology=="CC") {
    GO.pval2<-subset(GO.pval,ontology=="CC")
  } else {
    GO.pval2<-subset(GO.pval,ontology=="MF")
  }
    
  GO.pval2$over_represented_padjust<-p.adjust(GO.pval2$over_represented_pvalue,method="BH")
  if(GO.pval2$over_represented_padjust[1]>padjust) return("no enriched GO")
  else {
    enriched.GO<-GO.pval2[GO.pval2$over_represented_padjust<padjust,] 
    print("enriched.GO is")
    print(enriched.GO)
    
    ## write Term and Definition 
    for(i in 1:dim(enriched.GO)[1]) {
      if(is.null(Term(GOTERM[enriched.GO[i,"category"]]))) {next} else {
      enriched.GO$Term[i]<-Term(GOTERM[[enriched.GO[i,"category"]]])
      enriched.GO$Definition[i]<-Definition(GOTERM[[enriched.GO[i,"category"]]])
      }
    }
    return(enriched.GO)
  }
}
#
head(Bra.v3.0_cdna)
##   A DNAStringSet instance of length 6
##     width seq                                               names               
## [1]  1254 ATGCGACCACCGGGTGTTGTTTC...CTGAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2]  1668 ATGCCAGCAATGCATGCCGTTTT...GTAGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3]   957 ATGATGCTTCTCGTTCATACCCG...GGAACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4]  1299 ATGAGTCGTCTTCTCCTTGCTCA...GTGGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5]   774 ATGGATTCTGGGCTTCAGCATCT...AAGGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## [6]  3327 ATGGCGTCCACTCCTCCTCAAAA...GCGGTGGGTTTCAATTTCCTTGA BraA01g000060.3C
# length(bias) # 44239 > 45019 where the bias come from?
#  bias.data vector must have the same length as DEgenes vector!

GO ORA of each cluster

temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread$transcript_ID, cluster=kClusters.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            1.732470e-08                1.0000000         18
## 423  GO:0006468            1.855801e-08                1.0000000         29
## 2750 GO:0050832            1.926103e-08                1.0000000         14
## 1858 GO:0031348            2.494123e-05                0.9999987          5
## 638  GO:0006952            3.373325e-05                0.9999904         18
## 890  GO:0009611            4.127141e-05                0.9999930         10
## 3442 GO:1900067            5.827127e-05                0.9999999          2
## 1000 GO:0009814            6.360706e-05                0.9999976          4
## 921  GO:0009651            6.434943e-05                0.9999832         15
## 3242 GO:0080119            1.300857e-04                0.9999973          3
##      numInCat                                           term ontology
## 2253      726                  defense response to bacterium       BP
## 423      1484                        protein phosphorylation       BP
## 2750      469                     defense response to fungus       BP
## 1858       64        negative regulation of defense response       BP
## 638      1165                               defense response       BP
## 890       419                           response to wounding       BP
## 3442        3 regulation of cellular response to alkaline pH       BP
## 1000       49     defense response, incompatible interaction       BP
## 921      1045                        response to salt stress       BP
## 3242       17                           ER body organization       BP
##      over_represented_padjust
## 2253             2.432668e-05
## 423              2.432668e-05
## 2750             2.432668e-05
## 1858             2.362558e-02
## 638              2.556305e-02
## 890              2.606289e-02
## 3442             2.709111e-02
## 1000             2.709111e-02
## 921              2.709111e-02
## 3242             4.928947e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 390 GO:0006412             3.80768e-07                        1         11
##     numInCat        term ontology over_represented_padjust
## 390      715 translation       BP               0.00144273
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            3.409045e-11                        1          7
## 2259 GO:0042761            1.726215e-08                        1          5
## 1181 GO:0010143            1.931256e-06                        1          4
##      numInCat                                            term ontology
## 1291       41                    suberin biosynthetic process       BP
## 2259       30 very long-chain fatty acid biosynthetic process       BP
## 1181       29                      cutin biosynthetic process       BP
##      over_represented_padjust
## 1291             1.291687e-07
## 2259             3.270314e-05
## 1181             2.439177e-03
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            3.659965e-11                1.0000000         17
## 2253 GO:0042742            2.432098e-08                1.0000000         23
## 1174 GO:0010120            7.293180e-08                1.0000000          6
## 1166 GO:0010112            6.780014e-07                1.0000000          5
## 638  GO:0006952            8.277917e-07                0.9999998         27
## 899  GO:0009626            2.954187e-06                0.9999997          9
## 894  GO:0009617            3.846114e-06                0.9999994         11
## 890  GO:0009611            6.457569e-06                0.9999987         14
## 855  GO:0009409            1.517719e-05                0.9999961         17
## 944  GO:0009697            2.834944e-05                0.9999992          4
## 2750 GO:0050832            3.456585e-05                0.9999924         13
## 1205 GO:0010193            5.358930e-05                0.9999968          5
## 960  GO:0009737            7.818971e-05                0.9999759         18
## 187  GO:0002237            8.857740e-05                0.9999917          6
## 185  GO:0002229            1.515927e-04                0.9999802          7
##      numInCat                                       term ontology
## 1210      286                         response to chitin       BP
## 2253      726              defense response to bacterium       BP
## 1174       27             camalexin biosynthetic process       BP
## 1166       22 regulation of systemic acquired resistance       BP
## 638      1165                           defense response       BP
## 899       140         plant-type hypersensitive response       BP
## 894       241                      response to bacterium       BP
## 890       419                       response to wounding       BP
## 855       696                           response to cold       BP
## 944        21        salicylic acid biosynthetic process       BP
## 2750      469                 defense response to fungus       BP
## 1205       54                          response to ozone       BP
## 960       832                  response to abscisic acid       BP
## 187        88   response to molecule of bacterial origin       BP
## 185       126              defense response to oomycetes       BP
##      over_represented_padjust
## 1210             1.386761e-07
## 2253             4.607609e-05
## 1174             9.211286e-05
## 1166             6.273006e-04
## 638              6.273006e-04
## 899              1.865569e-03
## 894              2.081846e-03
## 890              3.058466e-03
## 855              6.389597e-03
## 944              1.074160e-02
## 2750             1.190636e-02
## 1205             1.692082e-02
## 960              2.278929e-02
## 187              2.397284e-02
## 185              3.829231e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732            8.264952e-07                1.0000000          5
## 3114 GO:0071369            1.490655e-06                1.0000000          5
## 3094 GO:0071281            7.588090e-06                0.9999997          5
## 254  GO:0006096            9.647819e-06                0.9999994          6
##      numInCat                                   term ontology
## 3164       52      cellular response to nitric oxide       BP
## 3114       62 cellular response to ethylene stimulus       BP
## 3094       77          cellular response to iron ion       BP
## 254       138                     glycolytic process       BP
##      over_represented_padjust
## 3164              0.002824047
## 3114              0.002824047
## 3094              0.009138897
## 254               0.009138897
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.trtsoil.DEG.Kmeans.cluster.csv")

any trt DEGs (two afternoon) clustering and cluster ORAs

K-means clustering

# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>% 
  inner_join(twoafternoon.any.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread) # [1] 2178   97
## [1] 2178   97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],
                                     centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=5:

set.seed(20)
kClust.any.trtlive.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.5 <- kClust.any.trtlive.5$cluster
# number of clusters
cluster.any.trtlive.5.num<-tibble(cluster=kClusters.any.trtlive.5) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.5.num$cluster<-as.character(cluster.any.trtlive.5.num$cluster) # classic way
cluster.any.trtlive.5.num

Now we can calculate the cluster ‘cores’ aka centroids: # find centroid in cluster

kClustcentroids.any.trtlive.5 <- sapply(levels(factor(kClusters.any.trtlive.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.5)
kClustcentroids.any.trtlive.5 %>% head()
##                                   1           2            3           4
## 1a1_q_002_S1_R1_001    -0.370624345 -0.27016316 -0.781291105 -0.09145448
## 1a3_q_004_S3_R1_001     0.586129826 -0.45516870 -0.082928903 -0.31258537
## 1a7_q_007_d8_S7_R1_001  0.603390134 -0.06622119 -0.118731825 -0.06524704
## 1a8_q_008_d8_S8_R1_001  0.513232164 -0.44547237 -0.347272642 -0.23834981
## 1c6_q_028_S22_R1_001   -0.008042207 -0.39487064  0.006183179 -0.18385917
## 1d3_q_038_S27_R1_001    0.423676033 -0.18654295 -0.166417100 -0.15567393
##                                  5
## 1a1_q_002_S1_R1_001     0.48593350
## 1a3_q_004_S3_R1_001    -0.04775671
## 1a7_q_007_d8_S7_R1_001 -0.02718576
## 1a8_q_008_d8_S8_R1_001  0.19646882
## 1c6_q_028_S22_R1_001    0.18640730
## 1d3_q_038_S27_R1_001    0.01866725

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.5 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.5.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean 
  ### under construction ####
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
p5.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level") 
p5.any.trtlive

ggsave(p5.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.5clusters.png",width=11,height=6)

Let’s perform the actual clsutering using K=6:

set.seed(20)
kClust.any.trtlive.6 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.6 <- kClust.any.trtlive.6$cluster
# number of clusters
cluster.any.trtlive.6.num<-tibble(cluster=kClusters.any.trtlive.6) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.6.num$cluster<-as.character(cluster.any.trtlive.6.num$cluster) # classic way
cluster.any.trtlive.6.num

Now we can calculate the cluster ‘cores’ aka centroids: # find centroid in cluster

kClustcentroids.any.trtlive.6 <- sapply(levels(factor(kClusters.any.trtlive.6)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.6)
kClustcentroids.any.trtlive.6 %>% head()
##                                  1          2          3           4
## 1a1_q_002_S1_R1_001     0.65133082 -0.3158853 -0.8226438 -0.37371653
## 1a3_q_004_S3_R1_001    -0.08768173 -0.5162655 -0.1554494  0.63258565
## 1a7_q_007_d8_S7_R1_001 -0.06407782 -0.0306091 -0.2095253  0.68120007
## 1a8_q_008_d8_S8_R1_001  0.31357964 -0.5018963 -0.3659202  0.54455777
## 1c6_q_028_S22_R1_001    0.07730847 -0.4843704  0.1434211 -0.03188877
## 1d3_q_038_S27_R1_001   -0.08124684 -0.1882003 -0.2357184  0.49835252
##                                  5           6
## 1a1_q_002_S1_R1_001    -0.05820563 -0.23794070
## 1a3_q_004_S3_R1_001    -0.22758805 -0.22464750
## 1a7_q_007_d8_S7_R1_001 -0.07040824 -0.03305671
## 1a8_q_008_d8_S8_R1_001 -0.16841988 -0.30044683
## 1c6_q_028_S22_R1_001   -0.12421671 -0.15052035
## 1d3_q_038_S27_R1_001   -0.14962567 -0.05877720

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.6 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.6.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean 
  ### under construction ####
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
p6.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): six clusters",color = "Cluster",y="scaled expression level") 
p6.any.trtlive

ggsave(p6.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.6clusters.png",width=11,height=6)

Let’s perform the actual clsutering using K=8:

set.seed(20)
kClust.any.trtlive.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.8 <- kClust.any.trtlive.8$cluster
# number of clusters
cluster.any.trtlive.8.num<-tibble(cluster=kClusters.any.trtlive.8) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.8.num$cluster<-as.character(cluster.any.trtlive.8.num$cluster) # classic way
cluster.any.trtlive.8.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}

kClustcentroids.any.trtlive.8 <- sapply(levels(factor(kClusters.any.trtlive.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.8)
kClustcentroids.any.trtlive.8 %>% head()
##                                  1           2           3          4
## 1a1_q_002_S1_R1_001     0.66774332 -0.32765717 -0.04664319 -0.3967766
## 1a3_q_004_S3_R1_001    -0.16465433 -0.35598558 -0.23130395 -0.6495724
## 1a7_q_007_d8_S7_R1_001 -0.09954455  0.07985282 -0.05066739 -0.2320192
## 1a8_q_008_d8_S8_R1_001  0.29726762 -0.41141910 -0.16242202 -0.6164058
## 1c6_q_028_S22_R1_001    0.05698738 -0.44714663 -0.12673854 -0.4659240
## 1d3_q_038_S27_R1_001   -0.06494311 -0.01126279 -0.13568017 -0.4660710
##                                  5          6           7            8
## 1a1_q_002_S1_R1_001    -0.15395817 -0.9249212 -0.35478101 -0.584426385
## 1a3_q_004_S3_R1_001    -0.27426807 -0.4472171  0.58951253  0.598921297
## 1a7_q_007_d8_S7_R1_001 -0.01876720 -0.3831983  0.72398958  0.051023263
## 1a8_q_008_d8_S8_R1_001 -0.28330120 -0.4472883  0.55642867 -0.032406247
## 1c6_q_028_S22_R1_001   -0.11632418  0.3594808 -0.04093214  0.001411183
## 1d3_q_038_S27_R1_001   -0.01812421 -0.2720918  0.51892604 -0.120575363

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.8 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.8.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)]) 
# plot
p8.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level") 
p8.any.trtlive

ggsave(p8.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.8clusters.png",width=11,height=8)

Let’s perform the actual clsutering using K=15:

set.seed(20)
kClust.any.trtlive.15 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.15 <- kClust.any.trtlive.15$cluster
# number of clusters
cluster.any.trtlive.15.num<-tibble(cluster=kClusters.any.trtlive.15) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.any.trtlive.15.num$cluster<-as.character(cluster.any.trtlive.15.num$cluster) # classic way
cluster.any.trtlive.15.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

clust.centroid = function(i, dat, clusters) {
  ind = (clusters == i)
  colMeans(dat[ind,])
}

kClustcentroids.any.trtlive.15 <- sapply(levels(factor(kClusters.any.trtlive.15)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.15)
kClustcentroids.any.trtlive.15 %>% head()
##                                 1            2          3          4
## 1a1_q_002_S1_R1_001     0.5363413 -0.368342751 -0.1930865 -0.9943538
## 1a3_q_004_S3_R1_001    -0.7294333 -0.364642600 -0.7338475 -0.4490430
## 1a7_q_007_d8_S7_R1_001 -0.3427135 -0.002591793 -0.1843309 -0.4124467
## 1a8_q_008_d8_S8_R1_001 -0.2509873 -0.479632975 -0.5871281 -0.4363606
## 1c6_q_028_S22_R1_001   -0.2064792 -0.519653925 -0.4785148  0.4240465
## 1d3_q_038_S27_R1_001   -0.4523775 -0.096021258 -0.3338139 -0.2100288
##                                  5            6          7           8
## 1a1_q_002_S1_R1_001     0.53906726  0.677494289 -0.2143971 -0.53950313
## 1a3_q_004_S3_R1_001     0.04061909  0.087791367 -0.3582435  0.18574802
## 1a7_q_007_d8_S7_R1_001  0.40465292 -0.031086513 -0.1696025  0.02199784
## 1a8_q_008_d8_S8_R1_001  0.40058289  0.358359368 -0.3610603  0.14468767
## 1c6_q_028_S22_R1_001   -0.20400698  0.146627590 -0.1992854  0.49896429
## 1d3_q_038_S27_R1_001    0.11604338 -0.009315873 -0.2560809 -0.17461624
##                                  9          10          11         12
## 1a1_q_002_S1_R1_001    -0.09328228 -0.05176534 -0.34883557 -0.5686172
## 1a3_q_004_S3_R1_001    -0.11607911 -0.12476358  0.94170519  0.6524345
## 1a7_q_007_d8_S7_R1_001 -0.03715975  0.14627723 -0.06335732  0.7831266
## 1a8_q_008_d8_S8_R1_001 -0.08119719 -0.14666264  0.10727360  0.4921936
## 1c6_q_028_S22_R1_001   -0.06907810  0.04886612  0.18259940 -0.2771016
## 1d3_q_038_S27_R1_001   -0.04874525  0.48051250 -0.19548676  0.7062537
##                                 13           14         15
## 1a1_q_002_S1_R1_001    -0.61805695 -0.317859857 -0.5165267
## 1a3_q_004_S3_R1_001     0.06162628 -0.315313752 -0.6740801
## 1a7_q_007_d8_S7_R1_001  0.14337203  0.066701371 -0.0568714
## 1a8_q_008_d8_S8_R1_001 -0.16162715 -0.378009042 -0.6538674
## 1c6_q_028_S22_R1_001   -0.16897390 -0.341682014 -0.4940442
## 1d3_q_038_S27_R1_001   -0.26367077  0.002871791 -0.4328578

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.any.trtlive.15 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.any.trtlive.15.num,by="cluster") %>%
    mutate(cluster.n=glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>%  dplyr::slice(rep(1:1440)[!duplicated(.$group.cluster)]) 
# plot
p15.any.trtlive<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) + 
  geom_point() + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
  facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): fifteen clusters",color = "Cluster",y="scaled expression level") 
p15.any.trtlive

ggsave(p15.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.15clusters.png",width=11,height=15)

GO ORA of each cluster

# 8 Kmeans cluster (my way using enframe, which I am not satisfied)
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv")

Julin’s method

# 5 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.5) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            3.003137e-29                1.0000000         64
## 638  GO:0006952            1.908550e-16                1.0000000         62
## 1174 GO:0010120            2.096043e-15                1.0000000         12
## 2750 GO:0050832            4.746634e-15                1.0000000         34
## 423  GO:0006468            2.673140e-14                1.0000000         73
## 890  GO:0009611            8.202025e-13                1.0000000         31
## 894  GO:0009617            4.432493e-12                1.0000000         23
## 3708 GO:2000022            2.023093e-11                1.0000000         14
## 1001 GO:0009816            8.965637e-11                1.0000000         15
## 1210 GO:0010200            2.735995e-10                1.0000000         21
## 1166 GO:0010112            8.973684e-10                1.0000000          8
## 185  GO:0002229            2.263061e-09                1.0000000         16
## 1857 GO:0031347            6.329428e-09                1.0000000         12
## 973  GO:0009753            1.218529e-07                1.0000000         20
## 853  GO:0009407            2.270165e-07                1.0000000          8
## 557  GO:0006749            5.668507e-07                1.0000000          9
## 899  GO:0009626            6.276177e-07                0.9999999         13
## 3438 GO:1900056            1.332557e-06                0.9999999          6
## 1183 GO:0010150            1.511624e-06                0.9999997         15
## 2301 GO:0043069            2.769559e-06                0.9999999          6
## 3442 GO:1900067            3.220011e-06                1.0000000          3
## 972  GO:0009751            5.643181e-06                0.9999986         17
## 3465 GO:1900457            6.387716e-06                0.9999999          4
## 646  GO:0006979            7.257172e-06                0.9999979         21
## 1858 GO:0031348            1.300314e-05                0.9999986          8
## 1205 GO:0010193            1.305263e-05                0.9999989          7
## 855  GO:0009409            1.367625e-05                0.9999954         25
## 3007 GO:0070370            1.432509e-05                0.9999994          5
## 900  GO:0009627            1.760788e-05                0.9999977          9
## 944  GO:0009697            2.460612e-05                0.9999989          5
## 2917 GO:0055114            2.523534e-05                0.9999874         53
## 2040 GO:0034087            2.900227e-05                0.9999993          4
## 3023 GO:0070550            2.900227e-05                0.9999993          4
## 3075 GO:0071169            2.900227e-05                0.9999993          4
## 3165 GO:0071733            2.900227e-05                0.9999993          4
## 3657 GO:1905406            2.900227e-05                0.9999993          4
## 3685 GO:1990414            2.900227e-05                0.9999993          4
## 942  GO:0009695            2.924173e-05                0.9999972          7
## 3322 GO:0090333            4.915444e-05                0.9999960          6
## 921  GO:0009651            6.652211e-05                0.9999716         32
## 1002 GO:0009817            8.221461e-05                0.9999885          8
## 1163 GO:0010106            8.638171e-05                0.9999967          4
## 1737 GO:0019761            9.623650e-05                0.9999886          7
## 558  GO:0006750            9.657666e-05                0.9999987          3
## 1035 GO:0009867            1.114810e-04                0.9999811          9
## 932  GO:0009682            1.469952e-04                0.9999894          5
## 898  GO:0009625            1.473365e-04                0.9999811          7
## 975  GO:0009759            1.657197e-04                0.9999925          4
## 3286 GO:0080185            1.670531e-04                0.9999969          3
## 933  GO:0009684            1.710238e-04                0.9999924          4
## 2794 GO:0051258            1.756902e-04                0.9999972          3
## 32   GO:0000162            2.093468e-04                0.9999836          5
## 485  GO:0006569            2.122848e-04                0.9999960          3
## 619  GO:0006887            2.237554e-04                0.9999637          8
## 895  GO:0009620            2.420533e-04                0.9999491         10
## 687  GO:0007076            2.592800e-04                0.9999872          4
## 2916 GO:0055091            2.727204e-04                1.0000000          2
## 3006 GO:0070328            2.727204e-04                1.0000000          2
## 2889 GO:0052544            2.788945e-04                0.9999770          5
## 1031 GO:0009863            2.898328e-04                0.9999755          5
## 3716 GO:2000031            3.016855e-04                0.9999742          5
## 2940 GO:0060548            3.416445e-04                0.9999815          4
## 1917 GO:0032260            3.874504e-04                0.9999985          2
## 3251 GO:0080142            4.220034e-04                0.9999756          4
## 642  GO:0006970            4.414010e-04                0.9998817         12
## 2582 GO:0046777            4.464102e-04                0.9998467         17
## 187  GO:0002237            6.107270e-04                0.9999005          7
## 1161 GO:0010104            7.951298e-04                0.9999696          3
## 2495 GO:0045927            8.705170e-04                0.9999667          3
##      numInCat
## 2253      726
## 638      1165
## 1174       27
## 2750      469
## 423      1484
## 890       419
## 894       241
## 3708       89
## 1001       97
## 1210      286
## 1166       22
## 185       126
## 1857       95
## 973       338
## 853        67
## 557        85
## 899       140
## 3438       26
## 1183      219
## 2301       30
## 3442        3
## 972       347
## 3465       10
## 646       502
## 1858       64
## 1205       54
## 855       696
## 3007       21
## 900       104
## 944        21
## 2917     1923
## 2040       10
## 3023       10
## 3075       10
## 3165       10
## 3657       10
## 3685       10
## 942        47
## 3322       40
## 921      1045
## 1002      100
## 1163       19
## 1737       69
## 558         6
## 1035      126
## 932        33
## 898        84
## 975        19
## 3286        8
## 933        17
## 2794        6
## 32         35
## 485         7
## 619        96
## 895       150
## 687        16
## 2916        2
## 3006        2
## 2889       31
## 1031       38
## 3716       38
## 2940       18
## 1917        3
## 3251       19
## 642       252
## 2582      363
## 187        88
## 1161       17
## 2495       14
##                                                                                                         term
## 2253                                                                           defense response to bacterium
## 638                                                                                         defense response
## 1174                                                                          camalexin biosynthetic process
## 2750                                                                              defense response to fungus
## 423                                                                                  protein phosphorylation
## 890                                                                                     response to wounding
## 894                                                                                    response to bacterium
## 3708                                                  regulation of jasmonic acid mediated signaling pathway
## 1001                                                 defense response to bacterium, incompatible interaction
## 1210                                                                                      response to chitin
## 1166                                                              regulation of systemic acquired resistance
## 185                                                                            defense response to oomycetes
## 1857                                                                          regulation of defense response
## 973                                                                                response to jasmonic acid
## 853                                                                                  toxin catabolic process
## 557                                                                            glutathione metabolic process
## 899                                                                       plant-type hypersensitive response
## 3438                                                                  negative regulation of leaf senescence
## 1183                                                                                         leaf senescence
## 2301                                                            negative regulation of programmed cell death
## 3442                                                          regulation of cellular response to alkaline pH
## 972                                                                               response to salicylic acid
## 3465                                                regulation of brassinosteroid mediated signaling pathway
## 646                                                                             response to oxidative stress
## 1858                                                                 negative regulation of defense response
## 1205                                                                                       response to ozone
## 855                                                                                         response to cold
## 3007                                                                               cellular heat acclimation
## 900                                                                             systemic acquired resistance
## 944                                                                      salicylic acid biosynthetic process
## 2917                                                                             oxidation-reduction process
## 2040                                                      establishment of mitotic sister chromatid cohesion
## 3023                                                                                       rDNA condensation
## 3075                                                      establishment of protein localization to chromatin
## 3165                                                 transcriptional activation by promoter-enhancer looping
## 3657                                                          positive regulation of mitotic cohesin loading
## 3685                               replication-born double-strand break repair via sister chromatid exchange
## 942                                                                       jasmonic acid biosynthetic process
## 3322                                                                          regulation of stomatal closure
## 921                                                                                  response to salt stress
## 1002                                                    defense response to fungus, incompatible interaction
## 1163                                                                cellular response to iron ion starvation
## 1737                                                                      glucosinolate biosynthetic process
## 558                                                                         glutathione biosynthetic process
## 1035                                                                jasmonic acid mediated signaling pathway
## 932                                                                              induced systemic resistance
## 898                                                                                       response to insect
## 975                                                                indole glucosinolate biosynthetic process
## 3286                                        effector dependent induction by symbiont of host immune response
## 933                                                                   indoleacetic acid biosynthetic process
## 2794                                                                                  protein polymerization
## 32                                                                           tryptophan biosynthetic process
## 485                                                                             tryptophan catabolic process
## 619                                                                                               exocytosis
## 895                                                                                       response to fungus
## 687                                                                          mitotic chromosome condensation
## 2916                                                                                phospholipid homeostasis
## 3006                                                                                triglyceride homeostasis
## 2889                                                     defense response by callose deposition in cell wall
## 1031                                                               salicylic acid mediated signaling pathway
## 3716                                                 regulation of salicylic acid mediated signaling pathway
## 2940                                                                       negative regulation of cell death
## 1917 response to jasmonic acid stimulus involved in jasmonic acid and ethylene-dependent systemic resistance
## 3251                                                       regulation of salicylic acid biosynthetic process
## 642                                                                               response to osmotic stress
## 2582                                                                             protein autophosphorylation
## 187                                                                 response to molecule of bacterial origin
## 1161                                                      regulation of ethylene-activated signaling pathway
## 2495                                                                           positive regulation of growth
##      ontology over_represented_padjust
## 2253       BP             1.137889e-25
## 638        BP             3.615748e-13
## 1174       BP             2.647302e-12
## 2750       BP             4.496249e-12
## 423        BP             2.025706e-11
## 890        BP             5.179579e-10
## 894        BP             2.399245e-09
## 3708       BP             9.581874e-09
## 1001       BP             3.774533e-08
## 1210       BP             1.036668e-07
## 1166       BP             3.091026e-07
## 185        BP             7.145614e-07
## 1857       BP             1.844785e-06
## 973        BP             3.297861e-05
## 853        BP             5.734436e-05
## 557        BP             1.342373e-04
## 899        BP             1.398849e-04
## 3438       BP             2.805032e-04
## 1183       BP             3.014497e-04
## 2301       BP             5.246930e-04
## 3442       BP             5.809819e-04
## 972        BP             9.719097e-04
## 3465       BP             1.052307e-03
## 646        BP             1.145726e-03
## 1858       BP             1.902170e-03
## 1205       BP             1.902170e-03
## 855        BP             1.919234e-03
## 3007       BP             1.938492e-03
## 900        BP             2.300560e-03
## 944        BP             2.915709e-03
## 2917       BP             2.915709e-03
## 2040       BP             2.915709e-03
## 3023       BP             2.915709e-03
## 3075       BP             2.915709e-03
## 3165       BP             2.915709e-03
## 3657       BP             2.915709e-03
## 3685       BP             2.915709e-03
## 942        BP             2.915709e-03
## 3322       BP             4.775543e-03
## 921        BP             6.301307e-03
## 1002       BP             7.597833e-03
## 1163       BP             7.792864e-03
## 1737       BP             8.316567e-03
## 558        BP             8.316567e-03
## 1035       BP             9.386699e-03
## 932        BP             1.187783e-02
## 898        BP             1.187783e-02
## 975        BP             1.291763e-02
## 3286       BP             1.291763e-02
## 933        BP             1.296019e-02
## 2794       BP             1.305275e-02
## 32         BP             1.517636e-02
## 485        BP             1.517636e-02
## 619        BP             1.570017e-02
## 895        BP             1.667527e-02
## 687        BP             1.754307e-02
## 2916       BP             1.781616e-02
## 3006       BP             1.781616e-02
## 2889       BP             1.791070e-02
## 1031       BP             1.830294e-02
## 3716       BP             1.873912e-02
## 2940       BP             2.087889e-02
## 1917       BP             2.330237e-02
## 3251       BP             2.498392e-02
## 642        BP             2.562800e-02
## 2582       BP             2.562800e-02
## 187        BP             3.453798e-02
## 1161       BP             4.430510e-02
## 2495       BP             4.780274e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            8.680462e-11                1.0000000          9
## 972  GO:0009751            5.938723e-08                1.0000000         15
## 515  GO:0006631            5.340014e-07                1.0000000          8
## 514  GO:0006629            4.303455e-06                0.9999994         11
## 1181 GO:0010143            6.172288e-06                0.9999998          5
## 1695 GO:0019441            1.951088e-05                0.9999998          3
## 2917 GO:0055114            4.354688e-05                0.9999818         33
## 967  GO:0009744            7.490509e-05                0.9999913          7
##      numInCat                                       term ontology
## 1291       41               suberin biosynthetic process       BP
## 972       347                 response to salicylic acid       BP
## 515        85               fatty acid metabolic process       BP
## 514       220                    lipid metabolic process       BP
## 1181       29                 cutin biosynthetic process       BP
## 1695        7 tryptophan catabolic process to kynurenine       BP
## 2917     1923                oxidation-reduction process       BP
## 967       136                        response to sucrose       BP
##      over_represented_padjust
## 1291             3.289027e-07
## 972              1.125091e-04
## 515              6.744437e-04
## 514              4.076447e-03
## 1181             4.677360e-03
## 1695             1.232112e-02
## 2917             2.357131e-02
## 967              3.547692e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732            8.887227e-12                1.0000000         13
## 1210 GO:0010200            1.371146e-11                1.0000000         26
## 638  GO:0006952            4.650995e-11                1.0000000         57
## 3094 GO:0071281            2.322895e-10                1.0000000         14
## 853  GO:0009407            9.886978e-08                1.0000000         10
## 3114 GO:0071369            1.468447e-07                1.0000000         10
## 3708 GO:2000022            6.386429e-07                0.9999999         11
## 359  GO:0006355            1.025051e-06                0.9999996         90
## 557  GO:0006749            1.661546e-06                0.9999998         10
## 3125 GO:0071456            2.270396e-06                0.9999999          7
## 1166 GO:0010112            2.374497e-06                0.9999999          6
## 890  GO:0009611            4.880059e-06                0.9999985         23
## 646  GO:0006979            1.566004e-05                0.9999948         24
## 1039 GO:0009873            2.803104e-05                0.9999917         19
## 1919 GO:0032268            4.851525e-05                0.9999996          3
## 1857 GO:0031347            5.033797e-05                0.9999924          9
## 2253 GO:0042742            5.896546e-05                0.9999758         30
## 973  GO:0009753            6.047415e-05                0.9999818         18
## 2525 GO:0046256            7.775999e-05                0.9999991          3
## 2750 GO:0050832            1.107468e-04                0.9999611         21
## 914  GO:0009644            1.397202e-04                0.9999757          9
## 475  GO:0006559            1.521539e-04                0.9999892          5
## 1162 GO:0010105            2.563072e-04                0.9999794          5
## 2907 GO:0055072            2.953109e-04                0.9999579          7
##      numInCat                                                        term
## 3164       52                           cellular response to nitric oxide
## 1210      286                                          response to chitin
## 638      1165                                            defense response
## 3094       77                               cellular response to iron ion
## 853        67                                     toxin catabolic process
## 3114       62                      cellular response to ethylene stimulus
## 3708       89      regulation of jasmonic acid mediated signaling pathway
## 359      2992                  regulation of transcription, DNA-templated
## 557        85                               glutathione metabolic process
## 3125       35                                cellular response to hypoxia
## 1166       22                  regulation of systemic acquired resistance
## 890       419                                        response to wounding
## 646       502                                response to oxidative stress
## 1039      364                        ethylene-activated signaling pathway
## 1919        5            regulation of cellular protein metabolic process
## 1857       95                              regulation of defense response
## 2253      726                               defense response to bacterium
## 973       338                                   response to jasmonic acid
## 2525        6                     2,4,6-trinitrotoluene catabolic process
## 2750      469                                  defense response to fungus
## 914       109                            response to high light intensity
## 475        27                           L-phenylalanine catabolic process
## 1162       29 negative regulation of ethylene-activated signaling pathway
## 2907       69                                        iron ion homeostasis
##      ontology over_represented_padjust
## 3164       BP             2.597635e-08
## 1210       BP             2.597635e-08
## 638        BP             5.874207e-08
## 3094       BP             2.200362e-07
## 853        BP             7.492352e-05
## 3114       BP             9.273245e-05
## 3708       BP             3.456883e-04
## 359        BP             4.854897e-04
## 557        BP             6.995107e-04
## 3125       BP             8.179061e-04
## 1166       BP             8.179061e-04
## 890        BP             1.540879e-03
## 646        BP             4.564300e-03
## 1039       BP             7.586400e-03
## 1919       BP             1.192066e-02
## 1857       BP             1.192066e-02
## 2253       BP             1.272981e-02
## 973        BP             1.272981e-02
## 2525       BP             1.550698e-02
## 2750       BP             2.098098e-02
## 914        BP             2.520952e-02
## 475        BP             2.620504e-02
## 1162       BP             4.222383e-02
## 2907       BP             4.662220e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.5cluster.csv")
# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.6) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760            6.666328e-07                1.0000000          3
## 1449 GO:0015714            2.310538e-06                1.0000000          3
## 2143 GO:0035436            3.733924e-06                1.0000000          3
## 1448 GO:0015713            7.917811e-06                0.9999999          3
## 359  GO:0006355            9.951440e-06                0.9999967         26
##      numInCat                                       term ontology
## 1462        5              glucose-6-phosphate transport       BP
## 1449        7              phosphoenolpyruvate transport       BP
## 2143        8   triose phosphate transmembrane transport       BP
## 1448       10   phosphoglycerate transmembrane transport       BP
## 359      2992 regulation of transcription, DNA-templated       BP
##      over_represented_padjust
## 1462              0.002525872
## 1449              0.004377314
## 2143              0.004715947
## 1448              0.007500146
## 359               0.007541201
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120            4.637858e-14                1.0000000         11
## 890  GO:0009611            8.865680e-13                1.0000000         29
## 2253 GO:0042742            2.390883e-12                1.0000000         38
## 638  GO:0006952            4.243378e-12                1.0000000         49
## 1166 GO:0010112            6.196936e-12                1.0000000          9
## 1210 GO:0010200            5.044516e-09                1.0000000         19
## 853  GO:0009407            2.034535e-08                1.0000000          9
## 1205 GO:0010193            3.190937e-08                1.0000000          9
## 557  GO:0006749            3.934100e-08                1.0000000         10
## 894  GO:0009617            4.632011e-08                1.0000000         17
## 899  GO:0009626            1.434417e-07                1.0000000         13
## 2750 GO:0050832            4.795283e-07                0.9999999         21
## 1001 GO:0009816            1.494838e-06                0.9999998         10
## 1183 GO:0010150            1.985337e-06                0.9999996         14
## 3708 GO:2000022            2.299477e-06                0.9999998          9
## 973  GO:0009753            2.974438e-06                0.9999993         17
## 972  GO:0009751            1.106489e-05                0.9999973         16
## 3438 GO:1900056            2.287173e-05                0.9999989          5
## 1857 GO:0031347            2.543493e-05                0.9999970          8
## 185  GO:0002229            2.874628e-05                0.9999954         10
## 1002 GO:0009817            3.540237e-05                0.9999956          8
## 423  GO:0006468            5.825218e-05                0.9999721         43
## 234  GO:0006032            6.100577e-05                0.9999963          5
## 619  GO:0006887            7.009588e-05                0.9999904          8
## 630  GO:0006904            1.164745e-04                0.9999857          7
## 933  GO:0009684            1.249085e-04                0.9999949          4
## 2889 GO:0052544            1.346118e-04                0.9999905          5
## 485  GO:0006569            1.497008e-04                0.9999975          3
## 2237 GO:0042538            1.591235e-04                0.9999752          8
## 1031 GO:0009863            1.740887e-04                0.9999868          5
## 2986 GO:0062034            2.044306e-04                0.9999960          3
## 2400 GO:0044419            2.250712e-04                0.9999954          3
## 1035 GO:0009867            2.299621e-04                0.9999621          8
## 944  GO:0009697            2.354733e-04                0.9999881          4
## 2859 GO:0051707            2.927450e-04                0.9999577          7
## 900  GO:0009627            3.694357e-04                0.9999443          7
## 895  GO:0009620            3.731450e-04                0.9999250          9
## 2034 GO:0034052            4.029842e-04                0.9999891          3
## 2917 GO:0055114            4.689601e-04                0.9997473         45
## 1737 GO:0019761            5.010736e-04                0.9999355          6
##      numInCat                                                      term
## 1174       27                            camalexin biosynthetic process
## 890       419                                      response to wounding
## 2253      726                             defense response to bacterium
## 638      1165                                          defense response
## 1166       22                regulation of systemic acquired resistance
## 1210      286                                        response to chitin
## 853        67                                   toxin catabolic process
## 1205       54                                         response to ozone
## 557        85                             glutathione metabolic process
## 894       241                                     response to bacterium
## 899       140                        plant-type hypersensitive response
## 2750      469                                defense response to fungus
## 1001       97   defense response to bacterium, incompatible interaction
## 1183      219                                           leaf senescence
## 3708       89    regulation of jasmonic acid mediated signaling pathway
## 973       338                                 response to jasmonic acid
## 972       347                                response to salicylic acid
## 3438       26                    negative regulation of leaf senescence
## 1857       95                            regulation of defense response
## 185       126                             defense response to oomycetes
## 1002      100      defense response to fungus, incompatible interaction
## 423      1484                                   protein phosphorylation
## 234        35                                  chitin catabolic process
## 619        96                                                exocytosis
## 630        73                    vesicle docking involved in exocytosis
## 933        17                    indoleacetic acid biosynthetic process
## 2889       31       defense response by callose deposition in cell wall
## 485         7                              tryptophan catabolic process
## 2237      132                            hyperosmotic salinity response
## 1031       38                 salicylic acid mediated signaling pathway
## 2986        8                     L-pipecolic acid biosynthetic process
## 2400        8                interspecies interaction between organisms
## 1035      126                  jasmonic acid mediated signaling pathway
## 944        21                       salicylic acid biosynthetic process
## 2859       97                                response to other organism
## 900       104                              systemic acquired resistance
## 895       150                                        response to fungus
## 2034       10 positive regulation of plant-type hypersensitive response
## 2917     1923                               oxidation-reduction process
## 1737       69                        glucosinolate biosynthetic process
##      ontology over_represented_padjust
## 1174       BP             1.757284e-10
## 890        BP             1.679603e-09
## 2253       BP             3.019685e-09
## 638        BP             4.019540e-09
## 1166       BP             4.696038e-09
## 1210       BP             3.185612e-06
## 853        BP             1.101265e-05
## 1205       BP             1.511308e-05
## 557        BP             1.656256e-05
## 894        BP             1.755069e-05
## 899        BP             4.940915e-05
## 2750       BP             1.514111e-04
## 1001       BP             4.356877e-04
## 1183       BP             5.373174e-04
## 3708       BP             5.808478e-04
## 973        BP             7.043841e-04
## 972        BP             2.466169e-03
## 3438       BP             4.814499e-03
## 1857       BP             5.072261e-03
## 185        BP             5.445983e-03
## 1002       BP             6.387599e-03
## 423        BP             1.003261e-02
## 234        BP             1.005004e-02
## 619        BP             1.106639e-02
## 630        BP             1.765288e-02
## 933        BP             1.820301e-02
## 2889       BP             1.889052e-02
## 485        BP             2.025772e-02
## 2237       BP             2.079030e-02
## 1031       BP             2.198741e-02
## 2986       BP             2.498670e-02
## 2400       BP             2.624142e-02
## 1035       BP             2.624142e-02
## 944        BP             2.624142e-02
## 2859       BP             3.169174e-02
## 900        BP             3.821207e-02
## 895        BP             3.821207e-02
## 2034       BP             4.018176e-02
## 2917       BP             4.556128e-02
## 1737       BP             4.746420e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            2.555181e-13                1.0000000         10
## 514  GO:0006629            4.812957e-07                0.9999999         11
## 1181 GO:0010143            2.014640e-06                0.9999999          5
## 972  GO:0009751            9.112896e-06                0.9999985         11
## 2917 GO:0055114            9.199414e-06                0.9999967         30
## 1695 GO:0019441            1.005513e-05                0.9999999          3
## 515  GO:0006631            2.199033e-05                0.9999984          6
## 646  GO:0006979            5.372240e-05                0.9999886         12
## 953  GO:0009725            1.054515e-04                0.9999981          3
##      numInCat                                       term ontology
## 1291       41               suberin biosynthetic process       BP
## 514       220                    lipid metabolic process       BP
## 1181       29                 cutin biosynthetic process       BP
## 972       347                 response to salicylic acid       BP
## 2917     1923                oxidation-reduction process       BP
## 1695        7 tryptophan catabolic process to kynurenine       BP
## 515        85               fatty acid metabolic process       BP
## 646       502               response to oxidative stress       BP
## 953        14                        response to hormone       BP
##      over_represented_padjust
## 1291             9.681580e-10
## 514              9.118147e-04
## 1181             2.544490e-03
## 972              6.349817e-03
## 2917             6.349817e-03
## 1695             6.349817e-03
## 515              1.190305e-02
## 646              2.544427e-02
## 953              4.439510e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732            1.250124e-11                1.0000000         12
## 3094 GO:0071281            1.727152e-10                1.0000000         13
## 1210 GO:0010200            2.261492e-10                1.0000000         22
## 359  GO:0006355            1.182921e-07                1.0000000         79
## 3114 GO:0071369            2.654178e-07                1.0000000          9
## 914  GO:0009644            5.003516e-07                0.9999999         11
## 3125 GO:0071456            5.590323e-07                1.0000000          7
## 638  GO:0006952            5.686545e-07                0.9999998         41
## 2239 GO:0042542            2.587505e-06                0.9999996         11
## 646  GO:0006979            5.123726e-06                0.9999985         22
## 1166 GO:0010112            1.563231e-05                0.9999993          5
## 1919 GO:0032268            2.630607e-05                0.9999998          3
## 3708 GO:2000022            4.877420e-05                0.9999937          8
## 854  GO:0009408            5.008081e-05                0.9999871         15
## 1039 GO:0009873            7.742102e-05                0.9999781         16
## 1270 GO:0010286            1.014015e-04                0.9999853          8
## 2253 GO:0042742            1.302990e-04                0.9999489         25
## 2795 GO:0051259            1.328760e-04                0.9999942          4
## 2750 GO:0050832            1.702764e-04                0.9999439         18
## 853  GO:0009407            2.442848e-04                0.9999726          6
## 3117 GO:0071398            2.532181e-04                0.9999944          3
## 170  GO:0001944            2.905167e-04                0.9999752          5
## 2487 GO:0045893            3.025579e-04                0.9998886         20
##      numInCat                                                   term ontology
## 3164       52                      cellular response to nitric oxide       BP
## 3094       77                          cellular response to iron ion       BP
## 1210      286                                     response to chitin       BP
## 359      2992             regulation of transcription, DNA-templated       BP
## 3114       62                 cellular response to ethylene stimulus       BP
## 914       109                       response to high light intensity       BP
## 3125       35                           cellular response to hypoxia       BP
## 638      1165                                       defense response       BP
## 2239      130                          response to hydrogen peroxide       BP
## 646       502                           response to oxidative stress       BP
## 1166       22             regulation of systemic acquired resistance       BP
## 1919        5       regulation of cellular protein metabolic process       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
## 854       305                                       response to heat       BP
## 1039      364                   ethylene-activated signaling pathway       BP
## 1270       99                                       heat acclimation       BP
## 2253      726                          defense response to bacterium       BP
## 2795       22                        protein complex oligomerization       BP
## 2750      469                             defense response to fungus       BP
## 853        67                                toxin catabolic process       BP
## 3117        9                        cellular response to fatty acid       BP
## 170        41                                vasculature development       BP
## 2487      570    positive regulation of transcription, DNA-templated       BP
##      over_represented_padjust
## 3164             4.736720e-08
## 3094             2.856265e-07
## 1210             2.856265e-07
## 359              1.120522e-04
## 3114             2.011336e-04
## 914              2.693290e-04
## 3125             2.693290e-04
## 638              2.693290e-04
## 2239             1.089340e-03
## 646              1.941380e-03
## 1166             5.384620e-03
## 1919             8.306142e-03
## 3708             1.355401e-02
## 854              1.355401e-02
## 1039             1.955655e-02
## 1270             2.401315e-02
## 2253             2.797040e-02
## 2795             2.797040e-02
## 2750             3.395671e-02
## 853              4.568779e-02
## 3117             4.568779e-02
## 170              4.984312e-02
## 2487             4.984312e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            2.066964e-18                1.0000000         37
## 423  GO:0006468            1.525619e-13                1.0000000         50
## 2750 GO:0050832            1.327629e-12                1.0000000         23
## 638  GO:0006952            7.917829e-11                1.0000000         36
## 3708 GO:2000022            9.773011e-10                1.0000000         10
## 1857 GO:0031347            4.231062e-07                1.0000000          8
## 26   GO:0000103            8.320928e-07                1.0000000          5
## 1210 GO:0010200            5.184900e-06                0.9999992         11
## 890  GO:0009611            1.488002e-05                0.9999968         14
## 3007 GO:0070370            2.605475e-05                0.9999993          4
## 1858 GO:0031348            3.037671e-05                0.9999977          6
## 642  GO:0006970            4.109078e-05                0.9999930         10
## 973  GO:0009753            6.199438e-05                0.9999877         11
## 3009 GO:0070417            1.176377e-04                0.9999916          5
## 900  GO:0009627            1.462438e-04                0.9999849          6
## 942  GO:0009695            1.484271e-04                0.9999890          5
## 2040 GO:0034087            1.497554e-04                0.9999971          3
## 3023 GO:0070550            1.497554e-04                0.9999971          3
## 3075 GO:0071169            1.497554e-04                0.9999971          3
## 3165 GO:0071733            1.497554e-04                0.9999971          3
## 3657 GO:1905406            1.497554e-04                0.9999971          3
## 3685 GO:1990414            1.497554e-04                0.9999971          3
## 3442 GO:1900067            1.797488e-04                0.9999995          2
## 894  GO:0009617            2.100138e-04                0.9999607          9
## 2917 GO:0055114            2.219751e-04                0.9999010         31
## 2223 GO:0042344            2.342897e-04                0.9999943          3
##      numInCat
## 2253      726
## 423      1484
## 2750      469
## 638      1165
## 3708       89
## 1857       95
## 26         24
## 1210      286
## 890       419
## 3007       21
## 1858       64
## 642       252
## 973       338
## 3009       54
## 900       104
## 942        47
## 2040       10
## 3023       10
## 3075       10
## 3165       10
## 3657       10
## 3685       10
## 3442        3
## 894       241
## 2917     1923
## 2223       15
##                                                                           term
## 2253                                             defense response to bacterium
## 423                                                    protein phosphorylation
## 2750                                                defense response to fungus
## 638                                                           defense response
## 3708                    regulation of jasmonic acid mediated signaling pathway
## 1857                                            regulation of defense response
## 26                                                        sulfate assimilation
## 1210                                                        response to chitin
## 890                                                       response to wounding
## 3007                                                 cellular heat acclimation
## 1858                                   negative regulation of defense response
## 642                                                 response to osmotic stress
## 973                                                  response to jasmonic acid
## 3009                                                 cellular response to cold
## 900                                               systemic acquired resistance
## 942                                         jasmonic acid biosynthetic process
## 2040                        establishment of mitotic sister chromatid cohesion
## 3023                                                         rDNA condensation
## 3075                        establishment of protein localization to chromatin
## 3165                   transcriptional activation by promoter-enhancer looping
## 3657                            positive regulation of mitotic cohesin loading
## 3685 replication-born double-strand break repair via sister chromatid exchange
## 3442                            regulation of cellular response to alkaline pH
## 894                                                      response to bacterium
## 2917                                               oxidation-reduction process
## 2223                                    indole glucosinolate catabolic process
##      ontology over_represented_padjust
## 2253       BP             7.831726e-15
## 423        BP             2.890285e-10
## 2750       BP             1.676795e-09
## 638        BP             7.500164e-08
## 3708       BP             7.405988e-07
## 1857       BP             2.671916e-04
## 26         BP             4.503999e-04
## 1210       BP             2.455698e-03
## 890        BP             6.264487e-03
## 3007       BP             9.872143e-03
## 1858       BP             1.046340e-02
## 642        BP             1.297441e-02
## 973        BP             1.806898e-02
## 3009       BP             2.579197e-02
## 900        BP             2.579197e-02
## 942        BP             2.579197e-02
## 2040       BP             2.579197e-02
## 3023       BP             2.579197e-02
## 3075       BP             2.579197e-02
## 3165       BP             2.579197e-02
## 3657       BP             2.579197e-02
## 3685       BP             2.579197e-02
## 3442       BP             2.961165e-02
## 894        BP             3.315592e-02
## 2917       BP             3.364255e-02
## 2223       BP             3.414322e-02
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.6cluster.csv")
# 8 Kmeans cluster
temp <- tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) # learn "~" for calling function in map
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760            5.623992e-07                1.0000000          3
## 359  GO:0006355            1.096760e-06                0.9999997         27
## 1449 GO:0015714            1.949991e-06                1.0000000          3
## 2143 GO:0035436            3.151415e-06                1.0000000          3
## 1448 GO:0015713            6.685127e-06                1.0000000          3
##      numInCat                                       term ontology
## 1462        5              glucose-6-phosphate transport       BP
## 359      2992 regulation of transcription, DNA-templated       BP
## 1449        7              phosphoenolpyruvate transport       BP
## 2143        8   triose phosphate transmembrane transport       BP
## 1448       10   phosphoglycerate transmembrane transport       BP
##      over_represented_padjust
## 1462              0.002077812
## 359               0.002077812
## 1449              0.002462839
## 2143              0.002985178
## 1448              0.005065989
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120            3.933902e-14                1.0000000         10
## 2253 GO:0042742            8.640276e-12                1.0000000         30
## 1166 GO:0010112            1.217158e-11                1.0000000          8
## 890  GO:0009611            3.002585e-10                1.0000000         21
## 638  GO:0006952            2.886080e-09                1.0000000         34
## 2750 GO:0050832            8.711643e-08                1.0000000         18
## 557  GO:0006749            2.512131e-07                1.0000000          8
## 853  GO:0009407            3.046529e-07                1.0000000          7
## 1210 GO:0010200            1.093472e-06                0.9999998         13
## 899  GO:0009626            1.204013e-06                0.9999999         10
## 894  GO:0009617            2.637069e-06                0.9999996         12
## 900  GO:0009627            3.095308e-06                0.9999997          8
## 3438 GO:1900056            3.275946e-06                0.9999999          5
## 2237 GO:0042538            9.409826e-06                0.9999990          8
## 1031 GO:0009863            2.617001e-05                0.9999987          5
## 933  GO:0009684            2.626258e-05                0.9999993          4
## 619  GO:0006887            3.813169e-05                0.9999961          7
## 485  GO:0006569            4.543349e-05                0.9999995          3
## 2986 GO:0062034            6.221247e-05                0.9999992          3
## 2400 GO:0044419            6.855688e-05                0.9999991          3
## 898  GO:0009625            8.048379e-05                0.9999926          6
## 3708 GO:2000022            1.235004e-04                0.9999877          6
## 2034 GO:0034052            1.235119e-04                0.9999978          3
## 972  GO:0009751            2.045166e-04                0.9999533         11
## 477  GO:0006561            2.669627e-04                0.9999933          3
## 932  GO:0009682            2.729335e-04                0.9999850          4
## 185  GO:0002229            3.590825e-04                0.9999460          7
## 646  GO:0006979            3.684017e-04                0.9998966         13
## 1001 GO:0009816            3.687152e-04                0.9999547          6
## 32   GO:0000162            3.720090e-04                0.9999777          4
## 2230 GO:0042372            4.057400e-04                0.9999876          3
##      numInCat                                                      term
## 1174       27                            camalexin biosynthetic process
## 2253      726                             defense response to bacterium
## 1166       22                regulation of systemic acquired resistance
## 890       419                                      response to wounding
## 638      1165                                          defense response
## 2750      469                                defense response to fungus
## 557        85                             glutathione metabolic process
## 853        67                                   toxin catabolic process
## 1210      286                                        response to chitin
## 899       140                        plant-type hypersensitive response
## 894       241                                     response to bacterium
## 900       104                              systemic acquired resistance
## 3438       26                    negative regulation of leaf senescence
## 2237      132                            hyperosmotic salinity response
## 1031       38                 salicylic acid mediated signaling pathway
## 933        17                    indoleacetic acid biosynthetic process
## 619        96                                                exocytosis
## 485         7                              tryptophan catabolic process
## 2986        8                     L-pipecolic acid biosynthetic process
## 2400        8                interspecies interaction between organisms
## 898        84                                        response to insect
## 3708       89    regulation of jasmonic acid mediated signaling pathway
## 2034       10 positive regulation of plant-type hypersensitive response
## 972       347                                response to salicylic acid
## 477        13                              proline biosynthetic process
## 932        33                               induced systemic resistance
## 185       126                             defense response to oomycetes
## 646       502                              response to oxidative stress
## 1001       97   defense response to bacterium, incompatible interaction
## 32         35                           tryptophan biosynthetic process
## 2230       18                        phylloquinone biosynthetic process
##      ontology over_represented_padjust
## 1174       BP             1.490555e-10
## 2253       BP             1.537270e-08
## 1166       BP             1.537270e-08
## 890        BP             2.844199e-07
## 638        BP             2.187072e-06
## 2750       BP             5.501403e-05
## 557        BP             1.359781e-04
## 853        BP             1.442912e-04
## 1210       BP             4.562004e-04
## 899        BP             4.562004e-04
## 894        BP             9.083503e-04
## 900        BP             9.548123e-04
## 3438       BP             9.548123e-04
## 2237       BP             2.546702e-03
## 1031       BP             6.219308e-03
## 933        BP             6.219308e-03
## 619        BP             8.498880e-03
## 485        BP             9.563750e-03
## 2986       BP             1.240648e-02
## 2400       BP             1.298810e-02
## 898        BP             1.452157e-02
## 3708       BP             2.034725e-02
## 2034       BP             2.034725e-02
## 972        BP             3.228805e-02
## 477        BP             3.977480e-02
## 932        BP             3.977480e-02
## 185        BP             4.698474e-02
## 646        BP             4.698474e-02
## 1001       BP             4.698474e-02
## 32         BP             4.698474e-02
## 2230       BP             4.959190e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            1.343043e-10                1.0000000         22
## 3164 GO:0071732            1.889538e-10                1.0000000         11
## 3094 GO:0071281            1.613934e-09                1.0000000         12
## 359  GO:0006355            2.926398e-07                1.0000000         76
## 914  GO:0009644            3.813626e-07                1.0000000         11
## 3125 GO:0071456            4.645253e-07                1.0000000          7
## 3114 GO:0071369            2.507765e-06                0.9999998          8
## 638  GO:0006952            4.345368e-06                0.9999982         38
## 1166 GO:0010112            1.355092e-05                0.9999994          5
## 1039 GO:0009873            1.550994e-05                0.9999960         17
## 170  GO:0001944            2.120295e-05                0.9999985          6
## 853  GO:0009407            2.365588e-05                0.9999978          7
## 1919 GO:0032268            2.430717e-05                0.9999998          3
## 854  GO:0009408            3.627089e-05                0.9999909         15
## 3708 GO:2000022            3.991164e-05                0.9999950          8
## 2239 GO:0042542            8.277225e-05                0.9999865          9
## 1270 GO:0010286            8.336564e-05                0.9999883          8
## 646  GO:0006979            1.072674e-04                0.9999647         19
## 2795 GO:0051259            1.232705e-04                0.9999947          4
## 557  GO:0006749            1.465446e-04                0.9999812          7
## 2253 GO:0042742            2.068113e-04                0.9999181         24
## 3117 GO:0071398            2.323324e-04                0.9999950          3
##      numInCat                                                   term ontology
## 1210      286                                     response to chitin       BP
## 3164       52                      cellular response to nitric oxide       BP
## 3094       77                          cellular response to iron ion       BP
## 359      2992             regulation of transcription, DNA-templated       BP
## 914       109                       response to high light intensity       BP
## 3125       35                           cellular response to hypoxia       BP
## 3114       62                 cellular response to ethylene stimulus       BP
## 638      1165                                       defense response       BP
## 1166       22             regulation of systemic acquired resistance       BP
## 1039      364                   ethylene-activated signaling pathway       BP
## 170        41                                vasculature development       BP
## 853        67                                toxin catabolic process       BP
## 1919        5       regulation of cellular protein metabolic process       BP
## 854       305                                       response to heat       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
## 2239      130                          response to hydrogen peroxide       BP
## 1270       99                                       heat acclimation       BP
## 646       502                           response to oxidative stress       BP
## 2795       22                        protein complex oligomerization       BP
## 557        85                          glutathione metabolic process       BP
## 2253      726                          defense response to bacterium       BP
## 3117        9                        cellular response to fatty acid       BP
##      over_represented_padjust
## 1210             3.579730e-07
## 3164             3.579730e-07
## 3094             2.038399e-06
## 359              2.772030e-04
## 914              2.889966e-04
## 3125             2.933477e-04
## 3114             1.357418e-03
## 638              2.058075e-03
## 1166             5.704937e-03
## 1039             5.876717e-03
## 170              7.084606e-03
## 853              7.084606e-03
## 1919             7.084606e-03
## 854              9.816458e-03
## 3708             1.008168e-02
## 2239             1.858073e-02
## 1270             1.858073e-02
## 646              2.257979e-02
## 2795             2.458274e-02
## 557              2.776287e-02
## 2253             3.731466e-02
## 3117             4.001398e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816            1.135827e-05                0.9999990          7
## 890  GO:0009611            1.193363e-05                0.9999976         13
## 3251 GO:0080142            2.049805e-05                0.9999995          4
## 1205 GO:0010193            3.256854e-05                0.9999982          5
## 187  GO:0002237            5.567136e-05                0.9999952          6
## 638  GO:0006952            6.623245e-05                0.9999771         22
## 1210 GO:0010200            8.592369e-05                0.9999857          9
## 973  GO:0009753            1.046543e-04                0.9999800         10
##      numInCat                                                    term ontology
## 1001       97 defense response to bacterium, incompatible interaction       BP
## 890       419                                    response to wounding       BP
## 3251       19       regulation of salicylic acid biosynthetic process       BP
## 1205       54                                       response to ozone       BP
## 187        88                response to molecule of bacterial origin       BP
## 638      1165                                        defense response       BP
## 1210      286                                      response to chitin       BP
## 973       338                               response to jasmonic acid       BP
##      over_represented_padjust
## 1001               0.02260826
## 890                0.02260826
## 3251               0.02588904
## 1205               0.03085055
## 187                0.04182579
## 638                0.04182579
## 1210               0.04650927
## 973                0.04956691
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            8.757706e-16                1.0000000         30
## 3708 GO:2000022            7.994984e-11                1.0000000         10
## 423  GO:0006468            5.373869e-10                1.0000000         37
## 2750 GO:0050832            2.165212e-09                1.0000000         17
## 638  GO:0006952            8.977942e-09                1.0000000         28
## 1857 GO:0031347            6.083376e-08                1.0000000          8
## 26   GO:0000103            2.373158e-07                1.0000000          5
## 890  GO:0009611            7.356536e-07                0.9999999         14
## 1210 GO:0010200            3.530396e-06                0.9999996         10
## 1174 GO:0010120            2.304231e-05                0.9999993          4
## 642  GO:0006970            3.054236e-05                0.9999956          9
## 3009 GO:0070417            3.406753e-05                0.9999981          5
## 973  GO:0009753            3.498600e-05                0.9999942         10
## 1183 GO:0010150            8.085242e-05                0.9999885          8
## 1858 GO:0031348            9.938846e-05                0.9999931          5
## 3442 GO:1900067            1.076857e-04                0.9999998          2
## 2223 GO:0042344            1.092568e-04                0.9999979          3
## 972  GO:0009751            1.274416e-04                0.9999777          9
## 2917 GO:0055114            1.933095e-04                0.9999217         26
##      numInCat                                                   term ontology
## 2253      726                          defense response to bacterium       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
## 423      1484                                protein phosphorylation       BP
## 2750      469                             defense response to fungus       BP
## 638      1165                                       defense response       BP
## 1857       95                         regulation of defense response       BP
## 26         24                                   sulfate assimilation       BP
## 890       419                                   response to wounding       BP
## 1210      286                                     response to chitin       BP
## 1174       27                         camalexin biosynthetic process       BP
## 642       252                             response to osmotic stress       BP
## 3009       54                              cellular response to cold       BP
## 973       338                              response to jasmonic acid       BP
## 1183      219                                        leaf senescence       BP
## 1858       64                negative regulation of defense response       BP
## 3442        3         regulation of cellular response to alkaline pH       BP
## 2223       15                 indole glucosinolate catabolic process       BP
## 972       347                             response to salicylic acid       BP
## 2917     1923                            oxidation-reduction process       BP
##      over_represented_padjust
## 2253             3.318295e-12
## 3708             1.514650e-07
## 423              6.787196e-07
## 2750             2.050997e-06
## 638              6.803485e-06
## 1857             3.841652e-05
## 26               1.284556e-04
## 890              3.484239e-04
## 1210             1.486297e-03
## 1174             8.730730e-03
## 642              1.019707e-02
## 3009             1.019707e-02
## 973              1.019707e-02
## 1183             2.188213e-02
## 1858             2.435142e-02
## 3442             2.435142e-02
## 2223             2.435142e-02
## 972              2.682646e-02
## 2917             3.854999e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            5.357720e-16                1.0000000         10
## 1181 GO:0010143            9.648414e-08                1.0000000          5
## 514  GO:0006629            1.779627e-07                1.0000000          9
## 2917 GO:0055114            3.330532e-07                0.9999999         23
## 515  GO:0006631            1.472111e-05                0.9999993          5
## 953  GO:0009725            1.731199e-05                0.9999998          3
## 1531 GO:0016042            2.458469e-05                0.9999976          7
## 2136 GO:0035336            4.497929e-05                0.9999994          3
##      numInCat                                        term ontology
## 1291       41                suberin biosynthetic process       BP
## 1181       29                  cutin biosynthetic process       BP
## 514       220                     lipid metabolic process       BP
## 2917     1923                 oxidation-reduction process       BP
## 515        85                fatty acid metabolic process       BP
## 953        14                         response to hormone       BP
## 1531      229                     lipid catabolic process       BP
## 2136       17 long-chain fatty-acyl-CoA metabolic process       BP
##      over_represented_padjust
## 1291             2.030040e-12
## 1181             1.827892e-04
## 514              2.247668e-04
## 2917             3.154847e-04
## 515              1.093252e-02
## 953              1.093252e-02
## 1531             1.330734e-02
## 2136             2.130332e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441            1.046761e-06                1.0000000          3
## 646  GO:0006979            1.940648e-06                0.9999998         10
## 1292 GO:0010350            3.796980e-05                0.9999999          2
## 3096 GO:0071286            3.796980e-05                0.9999999          2
## 3104 GO:0071325            3.796980e-05                0.9999999          2
## 3197 GO:0072709            3.796980e-05                0.9999999          2
## 1838 GO:0031115            6.036162e-05                0.9999999          2
## 736  GO:0007568            8.378585e-05                0.9999965          4
## 1572 GO:0016310            9.069704e-05                0.9999832         10
## 2844 GO:0051592            9.983663e-05                0.9999997          2
## 1840 GO:0031117            1.039244e-04                0.9999996          2
## 3201 GO:0075733            1.069764e-04                0.9999996          2
## 3093 GO:0071280            1.177608e-04                0.9999995          2
## 2162 GO:0035865            1.203759e-04                0.9999995          2
## 1619 GO:0018008            1.219134e-04                0.9999995          2
## 1248 GO:0010248            1.574567e-04                0.9999993          2
## 452  GO:0006520            2.210690e-04                0.9999881          4
##      numInCat
## 1695        7
## 646       502
## 1292        4
## 3096        4
## 3104        4
## 3197        4
## 1838        5
## 736        85
## 1572      685
## 2844        6
## 1840        7
## 3201        7
## 3093        7
## 2162        7
## 1619        6
## 1248        6
## 452        90
##                                                                        term
## 1695                             tryptophan catabolic process to kynurenine
## 646                                            response to oxidative stress
## 1292                              cellular response to magnesium starvation
## 3096                                     cellular response to magnesium ion
## 3104                                 cellular response to mannitol stimulus
## 3197                                          cellular response to sorbitol
## 1838                      negative regulation of microtubule polymerization
## 736                                                                   aging
## 1572                                                        phosphorylation
## 2844                                                response to calcium ion
## 1840                    positive regulation of microtubule depolymerization
## 3201                                       intracellular transport of virus
## 3093                                        cellular response to copper ion
## 2162                                     cellular response to potassium ion
## 1619                           N-terminal peptidyl-glycine N-myristoylation
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 452                                   cellular amino acid metabolic process
##      ontology over_represented_padjust
## 1695       BP              0.003676557
## 646        BP              0.003676557
## 1292       BP              0.023977928
## 3096       BP              0.023977928
## 3104       BP              0.023977928
## 3197       BP              0.023977928
## 1838       BP              0.030795317
## 736        BP              0.030795317
## 1572       BP              0.030795317
## 2844       BP              0.030795317
## 1840       BP              0.030795317
## 3201       BP              0.030795317
## 3093       BP              0.030795317
## 2162       BP              0.030795317
## 1619       BP              0.030795317
## 1248       BP              0.037287711
## 452        BP              0.049272379
# using unnest()
temp %>% unnest(GO_result) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv") # I like this way to have target genes in one data.frame
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.15) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 3114 GO:0071369            0.000000e+00                1.0000000          8
## 3164 GO:0071732            0.000000e+00                1.0000000          8
## 3094 GO:0071281            5.887219e-11                1.0000000          8
## 2907 GO:0055072            1.403391e-06                1.0000000          5
## 359  GO:0006355            9.616408e-06                0.9999972         22
## 1195 GO:0010167            1.116999e-05                0.9999997          4
## 613  GO:0006880            1.510212e-05                0.9999998          3
## 1760 GO:0030001            2.151165e-05                0.9999989          5
## 1764 GO:0030026            2.323359e-05                0.9999997          3
## 254  GO:0006096            5.543201e-05                0.9999966          5
## 612  GO:0006879            6.469221e-05                0.9999989          3
## 1114 GO:0010039            7.167439e-05                0.9999987          3
## 1952 GO:0032869            7.691956e-05                0.9999986          3
## 1462 GO:0015760            9.484983e-05                0.9999997          2
## 2487 GO:0045893            1.310402e-04                0.9999802          8
## 1798 GO:0030418            1.891972e-04                0.9999990          2
## 1449 GO:0015714            1.979116e-04                0.9999990          2
## 1438 GO:0015689            2.211728e-04                0.9999988          2
##      numInCat                                                term ontology
## 3114       62              cellular response to ethylene stimulus       BP
## 3164       52                   cellular response to nitric oxide       BP
## 3094       77                       cellular response to iron ion       BP
## 2907       69                                iron ion homeostasis       BP
## 359      2992          regulation of transcription, DNA-templated       BP
## 1195       53                                 response to nitrate       BP
## 613        20              intracellular sequestering of iron ion       BP
## 1760      130                                 metal ion transport       BP
## 1764       21                  cellular manganese ion homeostasis       BP
## 254       138                                  glycolytic process       BP
## 612        29                       cellular iron ion homeostasis       BP
## 1114       33                                response to iron ion       BP
## 1952       30               cellular response to insulin stimulus       BP
## 1462        5                       glucose-6-phosphate transport       BP
## 2487      570 positive regulation of transcription, DNA-templated       BP
## 1798        8                  nicotianamine biosynthetic process       BP
## 1449        7                       phosphoenolpyruvate transport       BP
## 1438        7                             molybdate ion transport       BP
##      over_represented_padjust
## 3114             0.000000e+00
## 3164             0.000000e+00
## 3094             7.435558e-08
## 2907             1.329362e-03
## 359              7.053851e-03
## 1195             7.053851e-03
## 613              8.174561e-03
## 1760             9.781343e-03
## 1764             9.781343e-03
## 254              2.100319e-02
## 612              2.228352e-02
## 1114             2.241909e-02
## 1952             2.241909e-02
## 1462             2.567043e-02
## 2487             3.310074e-02
## 1798             4.411100e-02
## 1449             4.411100e-02
## 1438             4.655688e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120            1.723122e-11                1.0000000          7
## 2253 GO:0042742            6.152850e-08                1.0000000         16
## 2750 GO:0050832            1.527669e-06                0.9999998         11
## 1031 GO:0009863            1.866709e-05                0.9999995          4
## 2230 GO:0042372            3.841027e-05                0.9999995          3
## 900  GO:0009627            5.003514e-05                0.9999970          5
## 894  GO:0009617            6.392564e-05                0.9999927          7
## 975  GO:0009759            6.961346e-05                0.9999988          3
## 855  GO:0009409            7.064222e-05                0.9999859         11
## 2790 GO:0051245            9.537243e-05                0.9999997          2
## 1166 GO:0010112            9.614152e-05                0.9999982          3
## 944  GO:0009697            1.070904e-04                0.9999979          3
## 1210 GO:0010200            1.072639e-04                0.9999867          7
## 3438 GO:1900056            1.520096e-04                0.9999966          3
## 2301 GO:0043069            1.719698e-04                0.9999959          3
## 1858 GO:0031348            1.811769e-04                0.9999908          4
## 557  GO:0006749            1.932567e-04                0.9999899          4
##      numInCat                                             term ontology
## 1174       27                   camalexin biosynthetic process       BP
## 2253      726                    defense response to bacterium       BP
## 2750      469                       defense response to fungus       BP
## 1031       38        salicylic acid mediated signaling pathway       BP
## 2230       18               phylloquinone biosynthetic process       BP
## 900       104                     systemic acquired resistance       BP
## 894       241                            response to bacterium       BP
## 975        19        indole glucosinolate biosynthetic process       BP
## 855       696                                 response to cold       BP
## 2790        4 negative regulation of cellular defense response       BP
## 1166       22       regulation of systemic acquired resistance       BP
## 944        21              salicylic acid biosynthetic process       BP
## 1210      286                               response to chitin       BP
## 3438       26           negative regulation of leaf senescence       BP
## 2301       30     negative regulation of programmed cell death       BP
## 1858       64          negative regulation of defense response       BP
## 557        85                    glutathione metabolic process       BP
##      over_represented_padjust
## 1174             6.528911e-08
## 2253             1.165657e-04
## 2750             1.929446e-03
## 1031             1.768240e-02
## 2230             2.910731e-02
## 900              2.974037e-02
## 894              2.974037e-02
## 975              2.974037e-02
## 855              2.974037e-02
## 2790             3.126329e-02
## 1166             3.126329e-02
## 944              3.126329e-02
## 1210             3.126329e-02
## 3438             4.114032e-02
## 2301             4.290496e-02
## 1858             4.290496e-02
## 557              4.307350e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816            9.333650e-07                0.9999999          7
## 3708 GO:2000022            1.751096e-06                0.9999999          6
## 894  GO:0009617            1.952047e-06                0.9999998          9
## 1857 GO:0031347            5.230519e-05                0.9999968          5
##      numInCat                                                    term ontology
## 1001       97 defense response to bacterium, incompatible interaction       BP
## 3708       89  regulation of jasmonic acid mediated signaling pathway       BP
## 894       241                                   response to bacterium       BP
## 1857       95                          regulation of defense response       BP
##      over_represented_padjust
## 1001              0.002465435
## 3708              0.002465435
## 894               0.002465435
## 1857              0.049546090
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345            0.000000e+00                1.0000000          9
## 1181 GO:0010143            1.631724e-08                1.0000000          5
## 2917 GO:0055114            2.704739e-07                0.9999999         19
## 514  GO:0006629            2.154168e-06                0.9999999          7
## 515  GO:0006631            2.635121e-06                0.9999999          5
## 1531 GO:0016042            3.108001e-05                0.9999976          6
## 1527 GO:0016024            6.205455e-05                0.9999990          3
##      numInCat                                    term ontology
## 1291       41            suberin biosynthetic process       BP
## 1181       29              cutin biosynthetic process       BP
## 2917     1923             oxidation-reduction process       BP
## 514       220                 lipid metabolic process       BP
## 515        85            fatty acid metabolic process       BP
## 1531      229                 lipid catabolic process       BP
## 1527       29 CDP-diacylglycerol biosynthetic process       BP
##      over_represented_padjust
## 1291             0.000000e+00
## 1181             3.091302e-05
## 2917             3.416085e-04
## 514              1.996895e-03
## 515              1.996895e-03
## 1531             1.962703e-02
## 1527             3.358924e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1530 GO:0016036            3.257160e-06                0.9999999          5
## 3219 GO:0080040            1.654299e-05                1.0000000          2
##      numInCat                                                             term
## 1530      148                        cellular response to phosphate starvation
## 3219        5 positive regulation of cellular response to phosphate starvation
##      ontology over_represented_padjust
## 1530       BP               0.01234138
## 3219       BP               0.03134070
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742            2.034123e-17                1.0000000         24
## 423  GO:0006468            1.807727e-12                1.0000000         29
## 1210 GO:0010200            7.269714e-08                1.0000000          9
## 972  GO:0009751            4.471802e-07                1.0000000          9
## 2750 GO:0050832            1.489589e-06                0.9999998         10
## 185  GO:0002229            1.667994e-05                0.9999988          6
## 638  GO:0006952            7.042662e-05                0.9999827         14
## 1174 GO:0010120            7.723694e-05                0.9999986          3
## 1858 GO:0031348            9.506968e-05                0.9999959          4
## 3708 GO:2000022            1.129933e-04                0.9999949          4
##      numInCat                                                   term ontology
## 2253      726                          defense response to bacterium       BP
## 423      1484                                protein phosphorylation       BP
## 1210      286                                     response to chitin       BP
## 972       347                             response to salicylic acid       BP
## 2750      469                             defense response to fungus       BP
## 185       126                          defense response to oomycetes       BP
## 638      1165                                       defense response       BP
## 1174       27                         camalexin biosynthetic process       BP
## 1858       64                negative regulation of defense response       BP
## 3708       89 regulation of jasmonic acid mediated signaling pathway       BP
##      over_represented_padjust
## 2253             7.707293e-14
## 423              3.424739e-09
## 1210             9.181649e-05
## 972              4.235915e-04
## 2750             1.128811e-03
## 185              1.053338e-02
## 638              3.658134e-02
## 1174             3.658134e-02
## 1858             4.002434e-02
## 3708             4.281315e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1737 GO:0019761            2.032233e-05                0.9999994          4
## 2997 GO:0070179            2.418116e-05                1.0000000          2
##      numInCat                               term ontology
## 1737       69 glucosinolate biosynthetic process       BP
## 2997        4      D-serine biosynthetic process       BP
##      over_represented_padjust
## 1737                0.0458112
## 2997                0.0458112
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200            4.498580e-11                1.0000000         19
## 914  GO:0009644            9.502988e-08                1.0000000         10
## 359  GO:0006355            1.299821e-07                1.0000000         58
## 3125 GO:0071456            8.872693e-07                1.0000000          6
## 3708 GO:2000022            2.292534e-06                0.9999998          8
## 2750 GO:0050832            3.085071e-06                0.9999993         17
## 638  GO:0006952            4.156627e-06                0.9999986         29
## 2239 GO:0042542            4.285154e-06                0.9999995          9
## 1919 GO:0032268            8.024532e-06                1.0000000          3
## 854  GO:0009408            8.921277e-06                0.9999983         13
## 58   GO:0000302            3.385541e-05                0.9999966          7
## 853  GO:0009407            3.481444e-05                0.9999973          6
## 2795 GO:0051259            3.977016e-05                0.9999987          4
## 170  GO:0001944            4.197116e-05                0.9999976          5
## 3465 GO:1900457            9.998220e-05                0.9999983          3
## 557  GO:0006749            1.469226e-04                0.9999849          6
##      numInCat                                                     term ontology
## 1210      286                                       response to chitin       BP
## 914       109                         response to high light intensity       BP
## 359      2992               regulation of transcription, DNA-templated       BP
## 3125       35                             cellular response to hypoxia       BP
## 3708       89   regulation of jasmonic acid mediated signaling pathway       BP
## 2750      469                               defense response to fungus       BP
## 638      1165                                         defense response       BP
## 2239      130                            response to hydrogen peroxide       BP
## 1919        5         regulation of cellular protein metabolic process       BP
## 854       305                                         response to heat       BP
## 58         96                      response to reactive oxygen species       BP
## 853        67                                  toxin catabolic process       BP
## 2795       22                          protein complex oligomerization       BP
## 170        41                                  vasculature development       BP
## 3465       10 regulation of brassinosteroid mediated signaling pathway       BP
## 557        85                            glutathione metabolic process       BP
##      over_represented_padjust
## 1210             1.704512e-07
## 914              1.641674e-04
## 359              1.641674e-04
## 3125             8.404659e-04
## 3708             1.737282e-03
## 2750             1.948222e-03
## 638              2.029556e-03
## 2239             2.029556e-03
## 1919             3.378328e-03
## 854              3.380272e-03
## 58               1.099266e-02
## 853              1.099266e-02
## 2795             1.135919e-02
## 170              1.135919e-02
## 3465             2.525550e-02
## 557              3.479310e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##      category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103            9.838045e-10                        1          5
##    numInCat                 term ontology over_represented_padjust
## 26       24 sulfate assimilation       BP             3.727635e-06
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441            9.629362e-08                1.0000000          3
## 1292 GO:0010350            9.046552e-06                1.0000000          2
## 3096 GO:0071286            9.046552e-06                1.0000000          2
## 3104 GO:0071325            9.046552e-06                1.0000000          2
## 3197 GO:0072709            9.046552e-06                1.0000000          2
## 1838 GO:0031115            1.460646e-05                1.0000000          2
## 859  GO:0009414            1.567843e-05                0.9999986          7
## 2844 GO:0051592            2.317204e-05                1.0000000          2
## 1619 GO:0018008            2.655419e-05                1.0000000          2
## 1840 GO:0031117            2.684154e-05                1.0000000          2
## 3201 GO:0075733            2.734060e-05                0.9999999          2
## 1248 GO:0010248            2.913482e-05                0.9999999          2
## 3093 GO:0071280            2.918774e-05                0.9999999          2
## 2162 GO:0035865            2.961950e-05                0.9999999          2
## 967  GO:0009744            3.347001e-05                0.9999989          4
## 646  GO:0006979            5.215913e-05                0.9999956          6
## 2829 GO:0051511            6.257254e-05                0.9999998          2
## 1282 GO:0010325            1.318124e-04                0.9999994          2
## 736  GO:0007568            1.789968e-04                0.9999956          3
## 3081 GO:0071219            1.826884e-04                0.9999990          2
## 441  GO:0006499            2.422844e-04                0.9999934          3
##      numInCat
## 1695        7
## 1292        4
## 3096        4
## 3104        4
## 3197        4
## 1838        5
## 859       596
## 2844        6
## 1619        6
## 1840        7
## 3201        7
## 1248        6
## 3093        7
## 2162        7
## 967       136
## 646       502
## 2829        9
## 1282       11
## 736        85
## 3081       14
## 441        89
##                                                                        term
## 1695                             tryptophan catabolic process to kynurenine
## 1292                              cellular response to magnesium starvation
## 3096                                     cellular response to magnesium ion
## 3104                                 cellular response to mannitol stimulus
## 3197                                          cellular response to sorbitol
## 1838                      negative regulation of microtubule polymerization
## 859                                           response to water deprivation
## 2844                                                response to calcium ion
## 1619                           N-terminal peptidyl-glycine N-myristoylation
## 1840                    positive regulation of microtubule depolymerization
## 3201                                       intracellular transport of virus
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 3093                                        cellular response to copper ion
## 2162                                     cellular response to potassium ion
## 967                                                     response to sucrose
## 646                                            response to oxidative stress
## 2829                      negative regulation of unidimensional cell growth
## 1282                  raffinose family oligosaccharide biosynthetic process
## 736                                                                   aging
## 3081                      cellular response to molecule of bacterial origin
## 441                                       N-terminal protein myristoylation
##      ontology over_represented_padjust
## 1695       BP             0.0003648565
## 1292       BP             0.0068554769
## 3096       BP             0.0068554769
## 3104       BP             0.0068554769
## 3197       BP             0.0068554769
## 1838       BP             0.0080163051
## 859        BP             0.0080163051
## 2844       BP             0.0080163051
## 1619       BP             0.0080163051
## 1840       BP             0.0080163051
## 3201       BP             0.0080163051
## 1248       BP             0.0080163051
## 3093       BP             0.0080163051
## 2162       BP             0.0080163051
## 967        BP             0.0084545246
## 646        BP             0.0123519337
## 2829       BP             0.0139463151
## 1282       BP             0.0277465042
## 736        BP             0.0346103103
## 3081       BP             0.0346103103
## 441        BP             0.0437150254
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 853 GO:0009407             7.21299e-06                0.9999998          4
##     numInCat                    term ontology over_represented_padjust
## 853       67 toxin catabolic process       BP               0.02733002
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1572 GO:0016310            4.546280e-06                0.9999993         12
## 2750 GO:0050832            9.987204e-06                0.9999988          9
## 423  GO:0006468            1.451836e-05                0.9999962         18
##      numInCat                       term ontology over_represented_padjust
## 1572      685            phosphorylation       BP               0.01722585
## 2750      469 defense response to fungus       BP               0.01833669
## 423      1484    protein phosphorylation       BP               0.01833669
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1166 GO:0010112            5.131427e-11                1.0000000          7
## 638  GO:0006952            1.205807e-09                1.0000000         28
## 890  GO:0009611            4.343094e-07                0.9999999         14
## 899  GO:0009626            4.232341e-06                0.9999996          8
## 973  GO:0009753            7.729912e-06                0.9999988         11
## 2253 GO:0042742            1.663149e-05                0.9999959         16
## 1205 GO:0010193            1.870619e-05                0.9999991          5
## 2237 GO:0042538            9.358914e-05                0.9999910          6
##      numInCat                                       term ontology
## 1166       22 regulation of systemic acquired resistance       BP
## 638      1165                           defense response       BP
## 890       419                       response to wounding       BP
## 899       140         plant-type hypersensitive response       BP
## 973       338                  response to jasmonic acid       BP
## 2253      726              defense response to bacterium       BP
## 1205       54                          response to ozone       BP
## 2237      132             hyperosmotic salinity response       BP
##      over_represented_padjust
## 1166             1.944298e-07
## 638              2.284402e-06
## 890              5.485327e-04
## 899              4.009085e-03
## 973              5.857727e-03
## 2253             1.012539e-02
## 1205             1.012539e-02
## 2237             4.432616e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 946 GO:0009699            5.793939e-06                0.9999999          4
## 890 GO:0009611            6.268525e-06                0.9999993          9
## 602 GO:0006855            7.426144e-06                0.9999996          6
##     numInCat                                 term ontology
## 946       34 phenylpropanoid biosynthetic process       BP
## 890      419                 response to wounding       BP
## 602      130         drug transmembrane transport       BP
##     over_represented_padjust
## 946               0.00937922
## 890               0.00937922
## 602               0.00937922
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.15cluster.csv")

Diurnal DEGS (3/4 days) clustering and cluster ORAs

diurnal34.time.DEGs.all.v3.0anno 
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)

# diurnal 3and4 days DEG expression data (scaled)
cpm.timecourse.v3.0.scale.diurnal34.time.DEG<-cpm.timecourse.v3.0.scale  %>% 
  inner_join(diurnal34.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_day %in% c("03","04")) #[1] 6774  121
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
with(cpm.timecourse.v3.0.scale.diurnal34.time.DEG,table(sampling_day,sample)) # OK
##             sample
## sampling_day 1a1_q_002_S1_R1_001 1a2_q_003_S2_R1_001 1a4_q_005_S4_R1_001
##           03                   0                   0                6774
##           04                6774                6774                   0
##             sample
## sampling_day 1a6_q_007_S6_R1_001 1b2_q_013_S10_R1_001 1b4_q_015_S12_R1_001
##           03                6774                    0                 6774
##           04                   0                 6774                    0
##             sample
## sampling_day 1b5_q_016_S13_R1_001 1b8_q_022_S16_R1_001 1c1_q_023_S17_R1_001
##           03                 6774                    0                 6774
##           04                    0                 6774                    0
##             sample
## sampling_day 1c5_q_027_S21_R1_001 1c7_q_031_S23_R1_001 1c8_q_032_S24_R1_001
##           03                    0                    0                    0
##           04                 6774                 6774                 6774
##             sample
## sampling_day 1d2_q_037_S26_R1_001 1d6_q_044_S30_R1_001 1e2_q_050_S34_R1_001
##           03                    0                 6774                 6774
##           04                 6774                    0                    0
##             sample
## sampling_day 1f2_q_062_S42_R1_001 1f4_q_066_S44_R1_001 1f5_q_068_S45_R1_001
##           03                 6774                    0                 6774
##           04                    0                 6774                    0
##             sample
## sampling_day 1f7_q_071_S47_R1_001 1f8_q_072_S48_R1_001 1g7_q_082_S55_R1_001
##           03                 6774                    0                 6774
##           04                    0                 6774                    0
##             sample
## sampling_day 1h5_q_091_S61_R1_001 1h6_q_095_S62_R1_001 1h7_q_096_S63_R1_001
##           03                 6774                    0                 6774
##           04                    0                 6774                    0
##             sample
## sampling_day 1i1_q_098_S65_R1_001 1i4_q_106_S68_R1_001 1i6_q_108_S70_R1_001
##           03                    0                 6774                    0
##           04                 6774                    0                 6774
##             sample
## sampling_day 1i8_q_111_S72_R1_001 1j1_q_112_S73_R1_001 1j2_q_113_S74_R1_001
##           03                    0                 6774                    0
##           04                 6774                    0                 6774
##             sample
## sampling_day 1j4_q_115_S76_R1_001 1j5_q_116_S77_R1_001 1j6_q_117_S78_R1_001
##           03                    0                 6774                 6774
##           04                 6774                    0                    0
##             sample
## sampling_day 1j8_q_119_S80_R1_001 1k4_q_127_S84_R1_001 1k7_q_134_S87_R1_001
##           03                    0                    0                    0
##           04                 6774                 6774                 6774
##             sample
## sampling_day 1l1_q_136_S89_R1_001 1l4_q_139_S92_R1_001 1l7_q_143_S95_R1_001
##           03                 6774                 6774                 6774
##           04                    0                    0                    0
##             sample
## sampling_day 1l8_q_144_S96_R1_001 2a1_q_146_S97_R1_001 2a4_q_150_S100_R1_001
##           03                    0                 6774                     0
##           04                 6774                    0                  6774
##             sample
## sampling_day 2a5_q_151_S101_R1_001 2a6_q_152_S102_R1_001 2b3_q_160_S107_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 2c7_q_178_S119_R1_001 2c8_q_179_S120_R1_001 2d1_q_180_S121_R1_001
##           03                     0                     0                     0
##           04                  6774                  6774                  6774
##             sample
## sampling_day 2d3_q_182_S123_R1_001 2d5_q_184_S125_R1_001 2e2_q_196_S130_R1_001
##           03                     0                  6774                  6774
##           04                  6774                     0                     0
##             sample
## sampling_day 2e4_q_199_S132_R1_001 2e5_q_200_S133_R1_001 2e7_q_201_S135_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 2e8_q_203_S136_R1_001 2f2_q_205_S138_R1_001 2f4_q_208_S140_R1_001
##           03                  6774                     0                     0
##           04                     0                  6774                  6774
##             sample
## sampling_day 2f6_q_212_S142_R1_001 2f7_q_213_S143_R1_001 2f8_q_216_S144_R1_001
##           03                     0                  6774                  6774
##           04                  6774                     0                     0
##             sample
## sampling_day 2g3_q_220_S147_R1_001 2g5_q_226_S149_R1_001 2g7_q_228_S151_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 2h1_q_230_S153_R1_001 2h5_q_236_S157_R1_001 2h8_q_240_S160_R1_001
##           03                  6774                  6774                  6774
##           04                     0                     0                     0
##             sample
## sampling_day 2i5_q_247_S165_R1_001 2i8_q_249_S168_R1_001 2j4_q_254_S172_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 2j5_q_255_S173_R1_001 2k3_q_266_S179_R1_001 2k5_q_271_S181_R1_001
##           03                     0                  6774                     0
##           04                  6774                     0                  6774
##             sample
## sampling_day 2k6_q_272_S182_R1_001 2k7_q_273_S183_R1_001 2k8_q_275_S184_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 2l1_q_276_S185_R1_001 2l3_q_280_S187_R1_001 2l4_q_282_S188_R1_001
##           03                     0                  6774                     0
##           04                  6774                     0                  6774
##             sample
## sampling_day 2l5_q_285_S189_R1_001 2l6_q_286_S190_R1_001 3a2_q_292_S194_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 3a4_q_294_S196_R1_001 3a6_q_296_S198_R1_001 3b2_q_301_S202_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 3b7_q_307_S207_R1_001 3b8_q_308_S208_R1_001 3c1_q_311_S209_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 3c3_q_314_S211_R1_001 3c4_q_315_S212_R1_001 3c5_q_317_S213_R1_001
##           03                  6774                     0                  6774
##           04                     0                  6774                     0
##             sample
## sampling_day 3c6_q_318_S214_R1_001 3d5_q_330_S221_R1_001 3d7_q_334_S223_R1_001
##           03                  6774                  6774                     0
##           04                     0                     0                  6774
##             sample
## sampling_day 3d8_q_336_S224_R1_001 3e2_q_342_S226_R1_001 3e3_q_343_S227_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 3e4_q_344_S228_R1_001 3e6_q_350_S230_R1_001 3f1_q_353_S233_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 3f3_q_354_S235_R1_001 3g2_q_364_S242_R1_001 3g4_q_366_S244_R1_001
##           03                     0                     0                  6774
##           04                  6774                  6774                     0
##             sample
## sampling_day 3g5_q_367_S245_R1_001 3h7_q_384_S255_R1_001 3i1_q_388_S257_R1_001
##           03                     0                     0                     0
##           04                  6774                  6774                  6774
##             sample
## sampling_day 3i2_q_389_S258_R1_001 3i3_q_391_S259_R1_001 3i6_q_395_S262_R1_001
##           03                  6774                  6774                  6774
##           04                     0                     0                     0
##             sample
## sampling_day 3i7_q_396_S263_R1_001 3j3_q_401_S267_R1_001 3j4_q_403_S268_R1_001
##           03                  6774                     0                     0
##           04                     0                  6774                  6774
##             sample
## sampling_day 3j6_q_407_S270_R1_001 3j7_q_409_S271_R1_001 3k1_q_411_S273_R1_001
##           03                  6774                  6774                     0
##           04                     0                     0                  6774
##             sample
## sampling_day 3k5_q_415_S277_R1_001 3l2_q_423_S282_R1_001 3l3_q_424_S283_R1_001
##           03                     0                  6774                  6774
##           04                  6774                     0                     0
##             sample
## sampling_day 3l4_q_426_S284_R1_001 3l5_q_427_S285_R1_001 3l7_q_429_S287_R1_001
##           03                  6774                     0                     0
##           04                     0                  6774                  6774
# spread"
cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread<-cpm.timecourse.v3.0.scale.diurnal34.time.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread) # [1] 6774  121
## [1] 6774  121
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1],2,var))
for (i in 2:30) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1],
                                     centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:30, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=6:

set.seed(20)
kClust.diurnal34.time.6 <- kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
kClusters.diurnal34.time.6 <- kClust.diurnal34.time.6$cluster
# number of clusters
cluster.diurnal34.time.6.num<-tibble(cluster=kClusters.diurnal34.time.6) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.diurnal34.time.6.num$cluster<-as.character(cluster.diurnal34.time.6.num$cluster) # classic way
cluster.diurnal34.time.6.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

kClustcentroids.diurnal34.time.6 <- sapply(levels(factor(kClusters.diurnal34.time.6)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], kClusters.diurnal34.time.6)
kClustcentroids.diurnal34.time.6 %>% head()
##                                1           2          3           4           5
## 1a1_q_002_S1_R1_001  -0.08766409 -0.28375827  0.3043848 -0.46588371  0.21342401
## 1a2_q_003_S2_R1_001  -0.88131939  0.05119095 -0.2101213  0.07575289  0.06628338
## 1a4_q_005_S4_R1_001  -0.55614078  0.07581021 -0.2089740 -0.30749340  0.02435838
## 1a6_q_007_S6_R1_001  -0.23204612 -0.19617309 -0.2666684  0.31211120  0.72286399
## 1b2_q_013_S10_R1_001 -0.10032508 -0.17246169  0.4241383  1.28913194 -0.22397835
## 1b4_q_015_S12_R1_001 -0.13558966 -0.22616853  0.3939676  1.20510364 -0.31804261
##                                 6
## 1a1_q_002_S1_R1_001  -0.266144832
## 1a2_q_003_S2_R1_001   0.747608834
## 1a4_q_005_S4_R1_001   1.090735726
## 1a6_q_007_S6_R1_001  -0.004685789
## 1b2_q_013_S10_R1_001  0.131912617
## 1b4_q_015_S12_R1_001  0.279213553

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.diurnal34.time.6 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.diurnal34.time.6.num,by="cluster") %>%
    mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>%  dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)]) 
# plot
p6.diurnal34.time<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) + 
  geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
  facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): six clusters",color = "Cluster",y="scaled expression level") 
p6.diurnal34.time

ggsave(p6.diurnal34.time,file="../output/diurnal34.time.DEG.Kmean.6clusters.png",width=11,height=15)

Let’s perform the actual clsutering using K=15:

set.seed(20)
kClust.diurnal34.time.15 <- kmeans(cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.diurnal34.time.15 <- kClust.diurnal34.time.15$cluster
# number of clusters
cluster.diurnal34.time.15.num<-tibble(cluster=kClusters.diurnal34.time.15) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.diurnal34.time.15.num$cluster<-as.character(cluster.diurnal34.time.15.num$cluster) # classic way
cluster.diurnal34.time.15.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

kClustcentroids.diurnal34.time.15 <- sapply(levels(factor(kClusters.diurnal34.time.15)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread[,-1], kClusters.diurnal34.time.15)
kClustcentroids.diurnal34.time.15 %>% head()
##                               1          2          3          4           5
## 1a1_q_002_S1_R1_001   0.2885866  0.2378342 -0.6047991 -0.5744638 -0.25382905
## 1a2_q_003_S2_R1_001  -0.1328990 -0.7039343 -0.2884263 -0.6839978  0.82912505
## 1a4_q_005_S4_R1_001  -0.4752669 -0.4612446 -0.5266081  1.0424779  1.11326429
## 1a6_q_007_S6_R1_001  -0.6306024  0.2121359  0.5424499  0.1902676  0.03288627
## 1b2_q_013_S10_R1_001  0.4841676 -0.1385557  1.4998500  0.6134886 -0.28789826
## 1b4_q_015_S12_R1_001  0.3289299 -0.2487495  1.3233530  0.6989989 -0.10277480
##                               6          7          8           9          10
## 1a1_q_002_S1_R1_001   0.4128140  0.3867571 -0.1960882 -0.37242665 -0.04356712
## 1a2_q_003_S2_R1_001  -0.7206098 -0.5591857  0.9373677 -0.87871570 -0.03295028
## 1a4_q_005_S4_R1_001  -0.2200417 -0.3096520  0.6308335 -0.60801022 -0.07359736
## 1a6_q_007_S6_R1_001  -0.4042662  0.5961622 -0.4481958 -0.49284345 -0.08888074
## 1b2_q_013_S10_R1_001  0.1238924 -0.3058549  0.6559920 -0.07016679 -0.01662834
## 1b4_q_015_S12_R1_001  0.2015720 -0.2612193  0.8432035 -0.10168012 -0.01782574
##                               11          12         13          14          15
## 1a1_q_002_S1_R1_001   0.30119260 -0.52449659  0.3969915 -0.17131547 -0.34740533
## 1a2_q_003_S2_R1_001  -0.03425461  0.09808085  0.1716039  1.30592230 -0.20882930
## 1a4_q_005_S4_R1_001   0.03116806  0.18471973 -0.3252010  0.76223845 -0.03195129
## 1a6_q_007_S6_R1_001   0.86162058 -0.51069396  0.3343381  0.37483605  0.42372214
## 1b2_q_013_S10_R1_001 -0.26838510 -0.31092742  0.7993373 -0.02639449 -0.44474854
## 1b4_q_015_S12_R1_001 -0.39770479 -0.42012854  0.7927001 -0.11193483 -0.54695574

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.diurnal34.time.15 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.diurnal34.time.15.num,by="cluster") %>%
    mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>%  dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)]) 
# plot
p15.diurnal34.time<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) + 
  geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
  facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): fifteen clusters",color = "Cluster",y="scaled expression level") 
p15.diurnal34.time

ggsave(p15.diurnal34.time,file="../output/diurnal34.time.DEG.Kmean.15clusters.png",width=11,height=15)

Diurnal DEGS (13/14 days) clustering and cluster ORAs

diurnal1314.time.DEGs.all.v3.0anno 
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)

# diurnal 3and4 days DEG expression data (scaled)
cpm.timecourse.v3.0.scale.diurnal1314.time.DEG<-cpm.timecourse.v3.0.scale  %>% 
  inner_join(diurnal1314.time.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_day %in% c("13","14")) #[1] 6774  121
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
with(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG,table(sampling_day,sample)) # OK
##             sample
## sampling_day 1a5_q_006_S5_R1_001 1b1_q_012_S9_R1_001 1b3_q_014_S11_R1_001
##           13               11886                   0                11886
##           14                   0               11886                    0
##             sample
## sampling_day 1b6_q_017_S14_R1_001 1b7_q_020_S15_R1_001 1c2_q_024_S18_R1_001
##           13                    0                11886                    0
##           14                11886                    0                11886
##             sample
## sampling_day 1c3_q_025_S19_R1_001 1c4_q_026_S20_R1_001 1c6_q_028_S22_R1_001
##           13                    0                    0                    0
##           14                11886                11886                11886
##             sample
## sampling_day 1d1_q_035_S25_R1_001 1d4_q_040_S28_R1_001 1d5_q_042_S29_R1_001
##           13                    0                11886                11886
##           14                11886                    0                    0
##             sample
## sampling_day 1d7_q_045_S31_R1_001 1d8_q_046_S32_R1_001 1e1_q_048_S33_R1_001
##           13                    0                11886                11886
##           14                11886                    0                    0
##             sample
## sampling_day 1e3_q_053_S35_R1_001 1e4_q_055_S36_R1_001 1e7_q_058_S39_R1_001
##           13                11886                    0                    0
##           14                    0                11886                11886
##             sample
## sampling_day 1f1_q_060_S41_R1_001 1f6_q_070_S46_R1_001 1g1_q_073_S49_R1_001
##           13                11886                11886                    0
##           14                    0                    0                11886
##             sample
## sampling_day 1g2_q_074_S50_R1_001 1g4_q_077_S52_R1_001 1g5_q_080_S53_R1_001
##           13                11886                11886                11886
##           14                    0                    0                    0
##             sample
## sampling_day 1g6_q_081_S54_R1_001 1g8_q_083_S56_R1_001 1h1_q_084_S57_R1_001
##           13                    0                    0                    0
##           14                11886                11886                11886
##             sample
## sampling_day 1h2_q_085_S58_R1_001 1h8_q_097_S64_R1_001 1i3_q_105_S67_R1_001
##           13                11886                    0                    0
##           14                    0                11886                11886
##             sample
## sampling_day 1i5_q_107_S69_R1_001 1i7_q_110_S71_R1_001 1j3_q_114_S75_R1_001
##           13                    0                    0                11886
##           14                11886                11886                    0
##             sample
## sampling_day 1k1_q_120_S81_R1_001 1k3_q_123_S83_R1_001 1k5_q_128_S85_R1_001
##           13                11886                11886                11886
##           14                    0                    0                    0
##             sample
## sampling_day 1l2_q_137_S90_R1_001 1l3_q_138_S91_R1_001 1l5_q_141_S93_R1_001
##           13                11886                11886                    0
##           14                    0                    0                11886
##             sample
## sampling_day 1l6_q_142_S94_R1_001 2a2_q_147_S98_R1_001 2a3_q_148_S99_R1_001
##           13                    0                11886                11886
##           14                11886                    0                    0
##             sample
## sampling_day 2a8_q_154_S104_R1_001 2b1_q_156_S105_R1_001 2b2_q_158_S106_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 2b4_q_161_S108_R1_001 2b5_q_162_S109_R1_001 2b6_q_164_S110_R1_001
##           13                     0                 11886                     0
##           14                 11886                     0                 11886
##             sample
## sampling_day 2c1_q_168_S113_R1_001 2c2_q_169_S114_R1_001 2c5_q_173_S117_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 2c6_q_175_S118_R1_001 2d4_q_183_S124_R1_001 2d6_q_185_S126_R1_001
##           13                     0                     0                     0
##           14                 11886                 11886                 11886
##             sample
## sampling_day 2d8_q_190_S128_R1_001 2e1_q_193_S129_R1_001 2e3_q_198_S131_R1_001
##           13                 11886                     0                 11886
##           14                     0                 11886                     0
##             sample
## sampling_day 2f1_q_204_S137_R1_001 2f3_q_206_S139_R1_001 2f5_q_211_S141_R1_001
##           13                     0                 11886                     0
##           14                 11886                     0                 11886
##             sample
## sampling_day 2g2_q_219_S146_R1_001 2g6_q_227_S150_R1_001 2g8_q_229_S152_R1_001
##           13                 11886                     0                 11886
##           14                     0                 11886                     0
##             sample
## sampling_day 2h2_q_231_S154_R1_001 2h3_q_232_S155_R1_001 2h6_q_237_S158_R1_001
##           13                     0                 11886                     0
##           14                 11886                     0                 11886
##             sample
## sampling_day 2h7_q_238_S159_R1_001 2i2_q_243_S162_R1_001 2i3_q_245_S163_R1_001
##           13                     0                 11886                 11886
##           14                 11886                     0                     0
##             sample
## sampling_day 2i4_q_246_S164_R1_001 2i7_q_248_S167_R1_001 2j2_q_252_S170_R1_001
##           13                 11886                     0                 11886
##           14                     0                 11886                     0
##             sample
## sampling_day 2j3_q_253_S171_R1_001 2j8_q_261_S176_R1_001 2k1_q_263_S177_R1_001
##           13                     0                 11886                 11886
##           14                 11886                     0                     0
##             sample
## sampling_day 2k2_q_265_S178_R1_001 2k4_q_267_S180_R1_001 2l2_q_278_S186_R1_001
##           13                     0                     0                 11886
##           14                 11886                 11886                     0
##             sample
## sampling_day 2l7_q_287_S191_R1_001 2l8_q_288_S192_R1_001 3a3_q_293_S195_R1_001
##           13                     0                     0                 11886
##           14                 11886                 11886                     0
##             sample
## sampling_day 3a5_q_295_S197_R1_001 3a7_q_297_S199_R1_001 3a8_q_299_S200_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 3b1_q_300_S201_R1_001 3b3_q_302_S203_R1_001 3b4_q_303_S204_R1_001
##           13                     0                     0                     0
##           14                 11886                 11886                 11886
##             sample
## sampling_day 3b6_q_306_S206_R1_001 3c2_q_312_S210_R1_001 3c8_q_323_S216_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 3d1_q_324_S217_R1_001 3d2_q_325_S218_R1_001 3d3_q_326_S219_R1_001
##           13                     0                 11886                 11886
##           14                 11886                     0                     0
##             sample
## sampling_day 3d4_q_329_S220_R1_001 3e1_q_339_S225_R1_001 3e5_q_348_S229_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 3e7_q_351_S231_R1_001 3e8_q_352_S232_R1_001 3f5_q_358_S237_R1_001
##           13                     0                 11886                     0
##           14                 11886                     0                 11886
##             sample
## sampling_day 3f6_q_359_S238_R1_001 3g1_q_362_S241_R1_001 3g3_q_365_S243_R1_001
##           13                     0                     0                     0
##           14                 11886                 11886                 11886
##             sample
## sampling_day 3g6_q_369_S246_R1_001 3g7_q_370_S247_R1_001 3g8_q_371_S248_R1_001
##           13                 11886                     0                 11886
##           14                     0                 11886                     0
##             sample
## sampling_day 3h1_q_372_S249_R1_001 3h4_q_376_S252_R1_001 3h6_q_378_S254_R1_001
##           13                 11886                     0                 11886
##           14                     0                 11886                     0
##             sample
## sampling_day 3i5_q_393_S261_R1_001 3i8_q_397_S264_R1_001 3j1_q_398_S265_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 3j2_q_399_S266_R1_001 3j5_q_405_S269_R1_001 3j8_q_410_S272_R1_001
##           13                     0                     0                 11886
##           14                 11886                 11886                     0
##             sample
## sampling_day 3k2_q_412_S274_R1_001 3k3_q_413_S275_R1_001 3k4_q_414_S276_R1_001
##           13                 11886                 11886                     0
##           14                     0                     0                 11886
##             sample
## sampling_day 3k8_q_420_S280_R1_001 3l6_q_428_S286_R1_001 3l8_q_432_S288_R1_001
##           13                     0                 11886                     0
##           14                 11886                     0                 11886
# spread"
cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread<-cpm.timecourse.v3.0.scale.diurnal1314.time.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread) # [1] 6774  121
## [1] 11886   121
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1],2,var))
for (i in 2:30) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1],
                                     centers=i,iter.max = 20)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:30, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")

Let’s perform the actual clsutering using K=6:

set.seed(20)
kClust.diurnal1314.time.6 <- kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], centers=6, nstart = 1000, iter.max = 20)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
kClusters.diurnal1314.time.6 <- kClust.diurnal1314.time.6$cluster
# number of clusters
cluster.diurnal1314.time.6.num<-tibble(cluster=kClusters.diurnal1314.time.6) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.diurnal1314.time.6.num$cluster<-as.character(cluster.diurnal1314.time.6.num$cluster) # classic way
cluster.diurnal1314.time.6.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

kClustcentroids.diurnal1314.time.6 <- sapply(levels(factor(kClusters.diurnal1314.time.6)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], kClusters.diurnal1314.time.6)
kClustcentroids.diurnal1314.time.6 %>% head()
##                                1           2           3           4          5
## 1a5_q_006_S5_R1_001   1.00607771 -0.38790092 -0.31463890 -0.26761449  0.5250045
## 1b1_q_012_S9_R1_001   0.34805286 -0.27211138  0.06583565  0.24194275  0.3304202
## 1b3_q_014_S11_R1_001  0.96268730 -0.42494557 -0.56290977  0.36674916  0.5140436
## 1b6_q_017_S14_R1_001 -0.62960914 -0.58599384  0.80073093 -0.11650377  0.9495071
## 1b7_q_020_S15_R1_001  0.99198831 -0.30439734 -0.63553067  0.09323508  0.4032669
## 1c2_q_024_S18_R1_001 -0.01010596  0.03600731  0.06221196  0.36154325 -0.2586159
##                                 6
## 1a5_q_006_S5_R1_001   0.127231417
## 1b1_q_012_S9_R1_001  -0.140044368
## 1b3_q_014_S11_R1_001  0.001924006
## 1b6_q_017_S14_R1_001 -0.166159823
## 1b7_q_020_S15_R1_001 -0.054333075
## 1c2_q_024_S18_R1_001 -0.277725538

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.diurnal1314.time.6 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.diurnal1314.time.6.num,by="cluster") %>%
    mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>%  dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)]) 
# plot
p6.diurnal1314.time<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) + 
  geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
  facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): six clusters",color = "Cluster",y="scaled expression level") 
p6.diurnal1314.time

ggsave(p6.diurnal1314.time,file="../output/diurnal1314.time.DEG.Kmean.6clusters.png",width=11,height=15)

Let’s perform the actual clsutering using K=15:

set.seed(20)
kClust.diurnal1314.time.15 <- kmeans(cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 594300)
kClusters.diurnal1314.time.15 <- kClust.diurnal1314.time.15$cluster
# number of clusters
cluster.diurnal1314.time.15.num<-tibble(cluster=kClusters.diurnal1314.time.15) %>% group_by(cluster) %>% summarize(n=n()) 
cluster.diurnal1314.time.15.num$cluster<-as.character(cluster.diurnal1314.time.15.num$cluster) # classic way
cluster.diurnal1314.time.15.num

Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i

kClustcentroids.diurnal1314.time.15 <- sapply(levels(factor(kClusters.diurnal1314.time.15)), clust.centroid, cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread[,-1], kClusters.diurnal1314.time.15)
kClustcentroids.diurnal1314.time.15 %>% head()
##                                1          2          3          4          5
## 1a5_q_006_S5_R1_001   0.90822348  0.7193382 0.78837550  0.3546389 -0.2685805
## 1b1_q_012_S9_R1_001   0.25532580  0.2161972 1.05435667 -0.1085664 -0.5004343
## 1b3_q_014_S11_R1_001  1.37268253  0.2996500 0.03584396  0.1611360 -0.2226214
## 1b6_q_017_S14_R1_001 -0.63972085  0.2377625 0.15627584 -0.9651293 -0.4043945
## 1b7_q_020_S15_R1_001  0.98105946  0.1122274 0.63255285  0.3856952 -0.4594923
## 1c2_q_024_S18_R1_001  0.05140275 -0.5287403 0.30530714 -0.1455849 -0.5466622
##                                6          7           8          9         10
## 1a5_q_006_S5_R1_001   0.66642092 -0.7975201 -0.31462479 -0.1276376 -0.4146333
## 1b1_q_012_S9_R1_001   0.14143216 -0.5234203  0.23789419  0.5367506  0.2948521
## 1b3_q_014_S11_R1_001  0.07170803 -0.7828654  0.07774152  0.6893292 -0.7533285
## 1b6_q_017_S14_R1_001 -0.29193113 -0.6605074 -0.44737701  0.2126839  0.9142712
## 1b7_q_020_S15_R1_001  0.41865948 -0.6796341  0.12426110  0.2872740 -0.7746740
## 1c2_q_024_S18_R1_001  0.03680482  0.1006786  0.32841856  0.2284217  0.3742139
##                              11         12           13           14         15
## 1a5_q_006_S5_R1_001  0.50952939  1.3870932 -0.002542094  0.006688155 -0.5537269
## 1b1_q_012_S9_R1_001  0.22195302  0.9202516 -0.116463977 -0.133184823 -0.5670371
## 1b3_q_014_S11_R1_001 1.00248063 -0.1448134 -0.072121978  0.056775228  0.5000807
## 1b6_q_017_S14_R1_001 1.44055235  0.1676767 -0.169097545  0.732294749  0.4968466
## 1b7_q_020_S15_R1_001 0.51335099  1.1859876 -0.067612254 -0.188951246 -0.6213241
## 1c2_q_024_S18_R1_001 0.02784435 -0.6198037 -0.105822254 -0.503023534  0.4579171

Plotting the centroids to see how they behave: tidyverse version

# adding sample description to data
  data.sample<-kClustcentroids.diurnal1314.time.15 %>% as_tibble(rownames="sample") %>% 
  gather(cluster,value,-1) %>% 
  inner_join(sample.description.timecourse,by="sample") %>% 
  inner_join(cluster.diurnal1314.time.15.num,by="cluster") %>%
    mutate(cluster.n=glue::glue('{cluster2} \n({n2})',
                          cluster2=cluster,
                          n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
  data.group<-data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% group_by(sampling_time.soil.cluster) %>% summarize(sampling_time.soil.cluster.mean=mean(value)) %>%  inner_join(
    data.sample %>% unite("sampling_time.soil.cluster", c("sampling_time","soil_trt","cluster"),remove=FALSE) %>% dplyr::select("sampling_time.soil.cluster","sampling_time","sampling_day","soil_trt","cluster.n","cluster"),by="sampling_time.soil.cluster") %>%  dplyr::slice(rep(1:1800)[!duplicated(.$sampling_time.soil.cluster)]) 
# plot
p15.diurnal1314.time<-  ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster),shape=sampling_day)) + 
  geom_jitter(alpha=0.2) + geom_hline(yintercept=0,color="red") + 
  geom_line(data=data.group,aes(x=soil_trt,y=sampling_time.soil.cluster.mean)) +
  facet_grid(cluster.n~sampling_time,scales="free") + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
  labs(title= "K-means clustering of diurnal DEGs (day 3 and 4): fifteen clusters",color = "Cluster",y="scaled expression level") 
p15.diurnal1314.time

ggsave(p15.diurnal1314.time,file="../output/diurnal1314.time.DEG.Kmean.15clusters.png",width=11,height=15)

GO ORA under construction (day 3/4)

# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal34.time.6) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 960  GO:0009737            6.904754e-09                1.0000000         53
## 2239 GO:0042542            1.361915e-07                1.0000000         16
## 859  GO:0009414            1.422210e-07                1.0000000         40
## 921  GO:0009651            3.073264e-07                1.0000000         57
## 2795 GO:0051259            1.204131e-06                1.0000000          6
## 359  GO:0006355            2.052973e-06                0.9999990        120
## 854  GO:0009408            8.029617e-06                0.9999975         23
## 3749 GO:2000377            2.200039e-05                0.9999976          8
##      numInCat                                                    term ontology
## 960       832                               response to abscisic acid       BP
## 2239      130                           response to hydrogen peroxide       BP
## 859       596                           response to water deprivation       BP
## 921      1045                                 response to salt stress       BP
## 2795       22                         protein complex oligomerization       BP
## 359      2992              regulation of transcription, DNA-templated       BP
## 854       305                                        response to heat       BP
## 3749       41 regulation of reactive oxygen species metabolic process       BP
##      over_represented_padjust
## 960              2.616211e-05
## 2239             1.796251e-04
## 859              1.796251e-04
## 921              2.911150e-04
## 2795             9.124907e-04
## 359              1.296452e-03
## 854              4.346317e-03
## 3749             1.041993e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 359  GO:0006355            3.068091e-13                1.0000000        202
## 3270 GO:0080167            6.288627e-08                1.0000000         31
## 2448 GO:0045490            8.427229e-06                0.9999974         23
## 1737 GO:0019761            1.003628e-05                0.9999982         13
## 1052 GO:0009909            3.199823e-05                0.9999907         19
## 2688 GO:0048573            4.684771e-05                0.9999899         13
## 1047 GO:0009901            7.092009e-05                0.9999895          9
## 918  GO:0009648            7.106530e-05                0.9999945          6
## 3086 GO:0071249            7.771098e-05                0.9999903          8
## 2973 GO:0061408            1.143176e-04                0.9999817          9
## 898  GO:0009625            1.328304e-04                0.9999701         12
## 956  GO:0009733            1.618924e-04                0.9999190         42
## 2908 GO:0055073            1.956185e-04                0.9999973          3
## 3238 GO:0080112            2.231339e-04                0.9999978          3
## 3650 GO:1905039            2.231339e-04                0.9999978          3
## 3655 GO:1905200            2.231339e-04                0.9999978          3
## 1463 GO:0015770            2.392384e-04                0.9999696          7
## 1210 GO:0010200            2.414460e-04                0.9999023         25
## 909  GO:0009639            2.460730e-04                0.9999456         11
##      numInCat
## 359      2992
## 3270      254
## 2448      175
## 1737       69
## 1052      162
## 2688       83
## 1047       41
## 918        20
## 3086       30
## 2973       47
## 898        84
## 956       612
## 2908        5
## 3238        4
## 3650        4
## 3655        4
## 1463       30
## 1210      286
## 909        69
##                                                                                                 term
## 359                                                       regulation of transcription, DNA-templated
## 3270                                                                            response to karrikin
## 2448                                                                        pectin catabolic process
## 1737                                                              glucosinolate biosynthetic process
## 1052                                                                regulation of flower development
## 2688                                                                       photoperiodism, flowering
## 1047                                                                               anther dehiscence
## 918                                                                                   photoperiodism
## 3086                                                                    cellular response to nitrate
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 898                                                                               response to insect
## 956                                                                                response to auxin
## 2908                                                                         cadmium ion homeostasis
## 3238                                                                                     seed growth
## 3650                                                         carboxylic acid transmembrane transport
## 3655                                                        gibberellic acid transmembrane transport
## 1463                                                                               sucrose transport
## 1210                                                                              response to chitin
## 909                                                                 response to red or far red light
##      ontology over_represented_padjust
## 359        BP             1.162500e-09
## 3270       BP             1.191380e-04
## 2448       BP             9.506870e-03
## 1737       BP             9.506870e-03
## 1052       BP             2.424826e-02
## 2688       BP             2.958433e-02
## 1047       BP             3.271632e-02
## 918        BP             3.271632e-02
## 3086       BP             3.271632e-02
## 2973       BP             4.331495e-02
## 898        BP             4.575404e-02
## 956        BP             4.907214e-02
## 2908       BP             4.907214e-02
## 3238       BP             4.907214e-02
## 3650       BP             4.907214e-02
## 3655       BP             4.907214e-02
## 1463       BP             4.907214e-02
## 1210       BP             4.907214e-02
## 909        BP             4.907214e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 26   GO:0000103            3.687865e-06                0.9999998          7
## 1656 GO:0019252            1.225072e-05                0.9999983         10
## 211  GO:0005983            1.286419e-05                0.9999987          8
## 1098 GO:0010021            2.955039e-05                0.9999989          5
## 416  GO:0006446            4.381791e-05                0.9999968          6
## 1688 GO:0019419            5.249899e-05                0.9999987          4
## 32   GO:0000162            1.003052e-04                0.9999887          7
## 3412 GO:0098869            1.029735e-04                0.9999936          5
## 210  GO:0005982            1.148782e-04                0.9999871          7
##      numInCat                                   term ontology
## 26         24                   sulfate assimilation       BP
## 1656       54            starch biosynthetic process       BP
## 211        33               starch catabolic process       BP
## 1098       11       amylopectin biosynthetic process       BP
## 416        25 regulation of translational initiation       BP
## 1688        8                      sulfate reduction       BP
## 32         35        tryptophan biosynthetic process       BP
## 3412       20        cellular oxidant detoxification       BP
## 210        30               starch metabolic process       BP
##      over_represented_padjust
## 26                 0.01397332
## 1656               0.01624747
## 211                0.01624747
## 1098               0.02799161
## 416                0.03315311
## 1688               0.03315311
## 32                 0.04836372
## 3412               0.04836372
## 210                0.04836372
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 423  GO:0006468            1.044821e-07                1.0000000         53
## 1859 GO:0031349            2.094012e-06                0.9999999          5
## 713  GO:0007169            3.359777e-06                0.9999993         15
##      numInCat                                                             term
## 423      1484                                          protein phosphorylation
## 1859       13                          positive regulation of defense response
## 713       199 transmembrane receptor protein tyrosine kinase signaling pathway
##      ontology over_represented_padjust
## 423        BP             0.0003958828
## 1859       BP             0.0039671056
## 713        BP             0.0042433981
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 859  GO:0009414            9.142363e-18                1.0000000         55
## 854  GO:0009408            1.210338e-15                1.0000000         36
## 2913 GO:0055085            1.817530e-12                1.0000000         50
## 594  GO:0006833            6.148887e-12                1.0000000         12
## 921  GO:0009651            3.950814e-10                1.0000000         59
## 960  GO:0009737            3.147896e-08                1.0000000         47
## 2088 GO:0034605            1.229675e-07                1.0000000         15
## 952  GO:0009723            6.353837e-07                0.9999998         21
## 359  GO:0006355            9.461339e-07                0.9999996        109
## 2575 GO:0046686            9.557011e-07                0.9999996         41
## 914  GO:0009644            1.020333e-06                0.9999998         13
## 2747 GO:0050821            3.509374e-06                0.9999998          7
## 739  GO:0007623            1.005242e-05                0.9999976         16
## 1434 GO:0015670            1.115771e-05                1.0000000          3
## 1724 GO:0019676            1.376845e-05                0.9999996          5
## 973  GO:0009753            1.725343e-05                0.9999946         22
## 3164 GO:0071732            3.093638e-05                0.9999964          8
## 2106 GO:0034765            3.213091e-05                0.9999970          7
## 646  GO:0006979            3.245504e-05                0.9999878         27
## 419  GO:0006457            3.380041e-05                0.9999882         24
## 3114 GO:0071369            6.554065e-05                0.9999913          8
## 2196 GO:0042026            7.267651e-05                0.9999870         10
## 1210 GO:0010200            7.892983e-05                0.9999757         18
## 962  GO:0009739            1.333565e-04                0.9999645         14
## 3541 GO:1902289            1.361178e-04                0.9999946          4
## 649  GO:0006986            1.436639e-04                0.9999863          6
## 602  GO:0006855            1.456259e-04                0.9999637         13
## 1094 GO:0010017            1.756139e-04                0.9999774          7
## 2452 GO:0045595            1.887136e-04                0.9999969          3
## 2257 GO:0042754            1.988722e-04                0.9999972          3
## 2486 GO:0045892            3.140835e-04                0.9998844         20
## 2091 GO:0034620            3.217688e-04                0.9999635          6
## 2280 GO:0042853            3.548542e-04                0.9999921          3
## 642  GO:0006970            3.739193e-04                0.9998788         16
## 3450 GO:1900150            3.873183e-04                0.9999542          6
##      numInCat                                                 term ontology
## 859       596                        response to water deprivation       BP
## 854       305                                     response to heat       BP
## 2913      562                              transmembrane transport       BP
## 594        34                                      water transport       BP
## 921      1045                              response to salt stress       BP
## 960       832                            response to abscisic acid       BP
## 2088      117                            cellular response to heat       BP
## 952       255                                 response to ethylene       BP
## 359      2992           regulation of transcription, DNA-templated       BP
## 2575      753                              response to cadmium ion       BP
## 914       109                     response to high light intensity       BP
## 2747       28                                protein stabilization       BP
## 739       181                                     circadian rhythm       BP
## 1434        3                             carbon dioxide transport       BP
## 1724       10                           ammonia assimilation cycle       BP
## 973       338                            response to jasmonic acid       BP
## 3164       52                    cellular response to nitric oxide       BP
## 2106       33            regulation of ion transmembrane transport       BP
## 646       502                         response to oxidative stress       BP
## 419       423                                      protein folding       BP
## 3114       62               cellular response to ethylene stimulus       BP
## 2196       97                                    protein refolding       BP
## 1210      286                                   response to chitin       BP
## 962       208                              response to gibberellin       BP
## 3541       13 negative regulation of defense response to oomycetes       BP
## 649        31                         response to unfolded protein       BP
## 602       130                         drug transmembrane transport       BP
## 1094       52               red or far-red light signaling pathway       BP
## 2452        6                   regulation of cell differentiation       BP
## 2257        5              negative regulation of circadian rhythm       BP
## 2486      355  negative regulation of transcription, DNA-templated       BP
## 2091       38                cellular response to unfolded protein       BP
## 2280        7                          L-alanine catabolic process       BP
## 642       252                           response to osmotic stress       BP
## 3450       40             regulation of defense response to fungus       BP
##      over_represented_padjust
## 859              3.464042e-14
## 854              2.292986e-12
## 2913             2.295540e-09
## 594              5.824533e-09
## 921              2.993927e-07
## 960              1.987896e-05
## 2088             6.656056e-05
## 952              3.009336e-04
## 359              3.514583e-04
## 2575             3.514583e-04
## 914              3.514583e-04
## 2747             1.108085e-03
## 739              2.929894e-03
## 1434             3.019754e-03
## 1724             3.477909e-03
## 973              4.085828e-03
## 3164             6.403487e-03
## 2106             6.403487e-03
## 646              6.403487e-03
## 419              6.403487e-03
## 3114             1.182541e-02
## 2196             1.251688e-02
## 1210             1.300283e-02
## 962              2.043617e-02
## 3541             2.043617e-02
## 649              2.043617e-02
## 602              2.043617e-02
## 1094             2.376433e-02
## 2452             2.465641e-02
## 2257             2.511755e-02
## 2486             3.809943e-02
## 2091             3.809943e-02
## 2280             4.074371e-02
## 642              4.167001e-02
## 3450             4.192998e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 364  GO:0006364            1.422386e-13                1.0000000         24
## 2803 GO:0051301            7.287253e-07                0.9999998         28
## 880  GO:0009561            1.724029e-06                0.9999999          8
## 755  GO:0008295            3.857971e-06                0.9999998          6
## 688  GO:0007088            6.679476e-06                0.9999992          9
## 871  GO:0009451            1.679454e-05                0.9999947         22
## 2471 GO:0045787            2.013055e-05                0.9999974          9
## 663  GO:0007018            2.487619e-05                0.9999949         13
## 307  GO:0006260            2.571845e-05                0.9999955         11
## 495  GO:0006596            3.083674e-05                0.9999978          6
## 473  GO:0006557            6.627647e-05                0.9999978          4
## 102  GO:0000494            9.621584e-05                0.9999986          3
## 3681 GO:1990258            9.621584e-05                0.9999986          3
## 312  GO:0006268            1.011262e-04                0.9999912          6
## 1851 GO:0031167            1.074289e-04                0.9999960          4
## 1540 GO:0016075            1.155552e-04                0.9999955          4
## 314  GO:0006270            1.277248e-04                0.9999880          6
## 496  GO:0006597            1.391222e-04                0.9999943          4
## 146  GO:0001510            1.645776e-04                0.9999928          4
## 112  GO:0000724            1.800148e-04                0.9999680          9
## 69   GO:0000381            1.870324e-04                0.9999805          6
## 50   GO:0000278            1.890979e-04                0.9999543         12
## 22   GO:0000079            1.953523e-04                0.9999692          8
## 1651 GO:0019079            2.150943e-04                0.9999957          3
## 555  GO:0006744            2.516847e-04                0.9999801          5
## 836  GO:0009294            2.671875e-04                0.9999633          7
## 343  GO:0006325            2.853236e-04                0.9999391         10
## 3035 GO:0070828            3.147048e-04                0.9999922          3
## 704  GO:0007142            3.596521e-04                0.9999919          3
## 3503 GO:1901565            3.679428e-04                0.9999986          2
## 2860 GO:0051726            3.802784e-04                0.9999077         11
##      numInCat
## 364       160
## 2803      419
## 880        36
## 755        22
## 688        61
## 871       273
## 2471       68
## 663       108
## 307        96
## 495        29
## 473        13
## 102         7
## 3681        7
## 312        27
## 1851       13
## 1540       14
## 314        32
## 496        15
## 146        16
## 112        82
## 69         45
## 50        159
## 22         76
## 1651        8
## 555        27
## 836        51
## 343       120
## 3035       10
## 704         7
## 3503        3
## 2860      151
##                                                                         term
## 364                                                          rRNA processing
## 2803                                                           cell division
## 880                                                        megagametogenesis
## 755                                          spermidine biosynthetic process
## 688                                   regulation of mitotic nuclear division
## 871                                                         RNA modification
## 2471                                       positive regulation of cell cycle
## 663                                               microtubule-based movement
## 307                                                          DNA replication
## 495                                           polyamine biosynthetic process
## 473                            S-adenosylmethioninamine biosynthetic process
## 102                                         box C/D snoRNA 3'-end processing
## 3681                                           histone glutamine methylation
## 312                                DNA unwinding involved in DNA replication
## 1851                                                        rRNA methylation
## 1540                                                  rRNA catabolic process
## 314                                               DNA replication initiation
## 496                                            spermine biosynthetic process
## 146                                                          RNA methylation
## 112                  double-strand break repair via homologous recombination
## 69                  regulation of alternative mRNA splicing, via spliceosome
## 50                                                        mitotic cell cycle
## 22   regulation of cyclin-dependent protein serine/threonine kinase activity
## 1651                                                viral genome replication
## 555                                          ubiquinone biosynthetic process
## 836                                              DNA mediated transformation
## 343                                                   chromatin organization
## 3035                                            heterochromatin organization
## 704                                                          male meiosis II
## 3503                               organonitrogen compound catabolic process
## 2860                                                regulation of cell cycle
##      ontology over_represented_padjust
## 364        BP             5.389421e-10
## 2803       BP             1.380570e-03
## 880        BP             2.177449e-03
## 755        BP             3.654463e-03
## 688        BP             5.061707e-03
## 871        BP             1.060575e-02
## 2471       BP             1.082747e-02
## 663        BP             1.082747e-02
## 307        BP             1.082747e-02
## 495        BP             1.168404e-02
## 473        BP             2.282923e-02
## 102        BP             2.713654e-02
## 3681       BP             2.713654e-02
## 312        BP             2.713654e-02
## 1851       BP             2.713654e-02
## 1540       BP             2.736491e-02
## 314        BP             2.846761e-02
## 496        BP             2.928523e-02
## 146        BP             3.218216e-02
## 112        BP             3.218216e-02
## 69         BP             3.218216e-02
## 50         BP             3.218216e-02
## 22         BP             3.218216e-02
## 1651       BP             3.395802e-02
## 555        BP             3.814533e-02
## 836        BP             3.893744e-02
## 343        BP             4.004042e-02
## 3035       BP             4.258630e-02
## 704        BP             4.647118e-02
## 3503       BP             4.647118e-02
## 2860       BP             4.647983e-02
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/diurnal34.time.DEG.Kmeans.6cluster.csv")
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal34.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal34.time.15) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 211 GO:0005983            1.572317e-08                1.0000000          8
## 739 GO:0007623            2.101937e-08                1.0000000         14
## 855 GO:0009409            3.356804e-05                0.9999895         20
##     numInCat                     term ontology over_represented_padjust
## 211       33 starch catabolic process       BP             0.0000398212
## 739      181         circadian rhythm       BP             0.0000398212
## 855      696         response to cold       BP             0.0423964320
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 744 GO:0008150            6.341344e-08                1.0000000         72
## 345 GO:0006334            9.184863e-06                0.9999996          5
##     numInCat                term ontology over_represented_padjust
## 744     6445  biological_process       BP             0.0002402735
## 345       44 nucleosome assembly       BP             0.0174007232
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 364  GO:0006364            2.764915e-14                1.0000000         17
## 69   GO:0000381            8.545837e-07                1.0000000          6
## 146  GO:0001510            3.621179e-06                0.9999999          4
## 102  GO:0000494            5.871156e-06                1.0000000          3
## 3681 GO:1990258            5.871156e-06                1.0000000          3
## 2437 GO:0045292            7.760361e-06                0.9999997          5
## 3035 GO:0070828            1.909049e-05                0.9999998          3
## 419  GO:0006457            2.666667e-05                0.9999943         13
## 2455 GO:0045604            2.798915e-05                0.9999998          3
## 2656 GO:0048444            3.055743e-05                0.9999991          4
## 855  GO:0009409            7.790258e-05                0.9999780         16
## 868  GO:0009446            8.884053e-05                0.9999985          3
## 2003 GO:0033388            8.884053e-05                0.9999985          3
## 1742 GO:0019856            9.872194e-05                0.9999984          3
## 1851 GO:0031167            1.226410e-04                0.9999977          3
## 3638 GO:1904812            1.691001e-04                1.0000000          2
## 1367 GO:0010499            1.854115e-04                0.9999906          4
##      numInCat                                                        term
## 364       160                                             rRNA processing
## 69         45    regulation of alternative mRNA splicing, via spliceosome
## 146        16                                             RNA methylation
## 102         7                            box C/D snoRNA 3'-end processing
## 3681        7                               histone glutamine methylation
## 2437       39                          mRNA cis splicing, via spliceosome
## 3035       10                                heterochromatin organization
## 419       423                                             protein folding
## 2455        6                regulation of epidermal cell differentiation
## 2656       19                                  floral organ morphogenesis
## 855       696                                            response to cold
## 868        11                             putrescine biosynthetic process
## 2003       11               putrescine biosynthetic process from arginine
## 1742        9                  pyrimidine nucleobase biosynthetic process
## 1851       13                                            rRNA methylation
## 3638        2         rRNA acetylation involved in maturation of SSU-rRNA
## 1367       49 proteasomal ubiquitin-independent protein catabolic process
##      ontology over_represented_padjust
## 364        BP             1.047626e-10
## 69         BP             1.619009e-03
## 146        BP             4.449162e-03
## 102        BP             4.449162e-03
## 3681       BP             4.449162e-03
## 2437       BP             4.900668e-03
## 3035       BP             1.033341e-02
## 419        BP             1.157821e-02
## 2455       BP             1.157821e-02
## 2656       BP             1.157821e-02
## 855        BP             2.589360e-02
## 868        BP             2.589360e-02
## 2003       BP             2.589360e-02
## 1742       BP             2.671839e-02
## 1851       BP             3.097912e-02
## 3638       BP             4.004501e-02
## 1367       BP             4.132496e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 69   GO:0000381            3.928140e-10                1.0000000          9
## 499  GO:0006606            2.366654e-09                1.0000000         10
## 2437 GO:0045292            6.300805e-08                1.0000000          7
## 762  GO:0008380            5.818397e-06                0.9999992         10
## 76   GO:0000398            1.103801e-05                0.9999984         10
## 398  GO:0006421            7.621456e-05                0.9999989          3
## 75   GO:0000395            8.351978e-05                0.9999987          3
##      numInCat                                                     term ontology
## 69         45 regulation of alternative mRNA splicing, via spliceosome       BP
## 499        67                              protein import into nucleus       BP
## 2437       39                       mRNA cis splicing, via spliceosome       BP
## 762       152                                             RNA splicing       BP
## 76        182                           mRNA splicing, via spliceosome       BP
## 398         8                          asparaginyl-tRNA aminoacylation       BP
## 75         10                          mRNA 5'-splice site recognition       BP
##      over_represented_padjust
## 69               1.488372e-06
## 499              4.483625e-06
## 2437             7.957916e-05
## 762              5.511477e-03
## 76               8.364602e-03
## 398              4.520806e-02
## 75               4.520806e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 854  GO:0009408            5.824553e-50                1.0000000         37
## 2088 GO:0034605            8.148688e-36                1.0000000         23
## 419  GO:0006457            1.959451e-33                1.0000000         30
## 2091 GO:0034620            1.447957e-21                1.0000000         12
## 649  GO:0006986            3.339549e-20                1.0000000         11
## 914  GO:0009644            7.547734e-20                1.0000000         14
## 2768 GO:0051085            2.403968e-19                1.0000000         13
## 2239 GO:0042542            7.162639e-19                1.0000000         14
## 2196 GO:0042026            1.077118e-16                1.0000000         12
## 2973 GO:0061408            1.067630e-12                1.0000000          8
## 892  GO:0009615            1.129479e-10                1.0000000          8
## 2795 GO:0051259            6.707294e-10                1.0000000          5
## 2775 GO:0051131            3.697003e-09                1.0000000          5
## 2747 GO:0050821            1.416684e-08                1.0000000          5
## 2575 GO:0046686            3.555674e-07                1.0000000         13
## 921  GO:0009651            3.837301e-07                0.9999999         15
## 58   GO:0000302            4.923887e-06                0.9999998          5
## 1712 GO:0019538            7.845640e-06                0.9999998          4
## 3092 GO:0071277            9.984307e-06                0.9999999          3
## 1210 GO:0010200            1.629073e-05                0.9999985          7
## 2967 GO:0061077            6.134692e-05                0.9999990          3
## 2327 GO:0043335            9.135742e-05                0.9999997          2
##      numInCat
## 854       305
## 2088      117
## 419       423
## 2091       38
## 649        31
## 914       109
## 2768       82
## 2239      130
## 2196       97
## 2973       47
## 892        82
## 2795       22
## 2775       20
## 2747       28
## 2575      753
## 921      1045
## 58         96
## 1712       39
## 3092       13
## 1210      286
## 2967       30
## 2327        5
##                                                                                                 term
## 854                                                                                 response to heat
## 2088                                                                       cellular response to heat
## 419                                                                                  protein folding
## 2091                                                           cellular response to unfolded protein
## 649                                                                     response to unfolded protein
## 914                                                                 response to high light intensity
## 2768                                                  chaperone cofactor-dependent protein refolding
## 2239                                                                   response to hydrogen peroxide
## 2196                                                                               protein refolding
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 892                                                                                response to virus
## 2795                                                                 protein complex oligomerization
## 2775                                                     chaperone-mediated protein complex assembly
## 2747                                                                           protein stabilization
## 2575                                                                         response to cadmium ion
## 921                                                                          response to salt stress
## 58                                                               response to reactive oxygen species
## 1712                                                                       protein metabolic process
## 3092                                                                cellular response to calcium ion
## 1210                                                                              response to chitin
## 2967                                                              chaperone-mediated protein folding
## 2327                                                                               protein unfolding
##      ontology over_represented_padjust
## 854        BP             2.206923e-46
## 2088       BP             1.543769e-32
## 419        BP             2.474787e-30
## 2091       BP             1.371577e-18
## 649        BP             2.530710e-17
## 914        BP             4.766394e-17
## 2768       BP             1.301234e-16
## 2239       BP             3.392405e-16
## 2196       BP             4.534666e-14
## 2973       BP             4.045248e-10
## 892        BP             3.890541e-08
## 2795       BP             2.117828e-07
## 2775       BP             1.077534e-06
## 2747       BP             3.834154e-06
## 2575       BP             8.981632e-05
## 921        BP             9.087208e-05
## 58         BP             1.097447e-03
## 1712       BP             1.651507e-03
## 3092       BP             1.991081e-03
## 1210       BP             3.086278e-03
## 2967       BP             1.106874e-02
## 2327       BP             1.573424e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 663  GO:0007018            5.402148e-16                1.0000000         21
## 636  GO:0006949            3.481836e-10                1.0000000          7
## 688  GO:0007088            5.058197e-09                1.0000000          9
## 2471 GO:0045787            1.795912e-08                1.0000000          9
## 22   GO:0000079            2.801338e-08                1.0000000          9
## 1380 GO:0010583            4.132701e-08                1.0000000          9
## 50   GO:0000278            6.934047e-08                1.0000000         12
## 753  GO:0008284            4.326029e-07                1.0000000          9
## 2860 GO:0051726            2.218223e-06                0.9999997         10
## 312  GO:0006268            3.189839e-06                0.9999999          6
## 1010 GO:0009828            1.100601e-05                0.9999993          6
## 130  GO:0000914            1.206645e-05                0.9999998          4
## 2803 GO:0051301            2.966280e-05                0.9999923         16
## 3277 GO:0080175            7.396195e-05                0.9999993          3
## 930  GO:0009664            1.564350e-04                0.9999837          6
##      numInCat
## 663       108
## 636        25
## 688        61
## 2471       68
## 22         76
## 1380       67
## 50        159
## 753       100
## 2860      151
## 312        27
## 1010       72
## 130        10
## 2803      419
## 3277        5
## 930        91
##                                                                         term
## 663                                               microtubule-based movement
## 636                                                      syncytium formation
## 688                                   regulation of mitotic nuclear division
## 2471                                       positive regulation of cell cycle
## 22   regulation of cyclin-dependent protein serine/threonine kinase activity
## 1380                                              response to cyclopentenone
## 50                                                        mitotic cell cycle
## 753                                positive regulation of cell proliferation
## 2860                                                regulation of cell cycle
## 312                                DNA unwinding involved in DNA replication
## 1010                                          plant-type cell wall loosening
## 130                                                    phragmoplast assembly
## 2803                                                           cell division
## 3277                                   phragmoplast microtubule organization
## 930                                        plant-type cell wall organization
##      ontology over_represented_padjust
## 663        BP             2.046874e-12
## 636        BP             6.596338e-07
## 688        BP             6.388502e-06
## 2471       BP             1.701178e-05
## 22         BP             2.122854e-05
## 1380       BP             2.609801e-05
## 50         BP             3.753301e-05
## 753        BP             2.048915e-04
## 2860       BP             9.338717e-04
## 312        BP             1.208630e-03
## 1010       BP             3.791072e-03
## 130        BP             3.809983e-03
## 2803       BP             8.645565e-03
## 3277       BP             2.001727e-02
## 930        BP             3.951549e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##       category over_represented_pvalue under_represented_pvalue numDEInCat
## 859 GO:0009414            5.472794e-07                0.9999998         25
## 359 GO:0006355            9.154185e-06                0.9999954         67
## 960 GO:0009737            9.520552e-06                0.9999966         28
## 962 GO:0009739            4.407560e-05                0.9999916         11
##     numInCat                                       term ontology
## 859      596              response to water deprivation       BP
## 359     2992 regulation of transcription, DNA-templated       BP
## 960      832                  response to abscisic acid       BP
## 962      208                    response to gibberellin       BP
##     over_represented_padjust
## 859              0.002073642
## 359              0.012024458
## 960              0.012024458
## 962              0.041750609
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 359  GO:0006355            1.658801e-07                1.0000000        119
## 2795 GO:0051259            3.503559e-07                1.0000000          7
## 3270 GO:0080167            2.089545e-06                0.9999994         21
## 914  GO:0009644            3.111287e-06                0.9999995         13
## 58   GO:0000302            2.089701e-05                0.9999964         11
## 859  GO:0009414            2.562366e-05                0.9999895         33
## 615  GO:0006883            3.714272e-05                0.9999991          4
## 1270 GO:0010286            3.797205e-05                0.9999931         11
## 3238 GO:0080112            5.769044e-05                0.9999996          3
## 3650 GO:1905039            5.769044e-05                0.9999996          3
## 3655 GO:1905200            5.769044e-05                0.9999996          3
## 918  GO:0009648            9.658606e-05                0.9999941          5
## 1463 GO:0015770            9.767588e-05                0.9999914          6
## 1919 GO:0032268            1.381421e-04                0.9999983          3
##      numInCat                                             term ontology
## 359      2992       regulation of transcription, DNA-templated       BP
## 2795       22                  protein complex oligomerization       BP
## 3270      254                             response to karrikin       BP
## 914       109                 response to high light intensity       BP
## 58         96              response to reactive oxygen species       BP
## 859       596                    response to water deprivation       BP
## 615         9                  cellular sodium ion homeostasis       BP
## 1270       99                                 heat acclimation       BP
## 3238        4                                      seed growth       BP
## 3650        4          carboxylic acid transmembrane transport       BP
## 3655        4         gibberellic acid transmembrane transport       BP
## 918        20                                   photoperiodism       BP
## 1463       30                                sucrose transport       BP
## 1919        5 regulation of cellular protein metabolic process       BP
##      over_represented_padjust
## 359              0.0006285196
## 2795             0.0006637492
## 3270             0.0026390947
## 914              0.0029471667
## 58               0.0158357545
## 859              0.0161813391
## 615              0.0179845132
## 1270             0.0179845132
## 3238             0.0198717329
## 3650             0.0198717329
## 3655             0.0198717329
## 918              0.0284687622
## 1463             0.0284687622
## 1919             0.0373871617
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 859  GO:0009414            1.257030e-11                1.0000000         27
## 594  GO:0006833            5.258354e-10                1.0000000          8
## 2913 GO:0055085            1.273853e-07                1.0000000         23
## 921  GO:0009651            3.712465e-05                0.9999869         24
##      numInCat                          term ontology over_represented_padjust
## 859       596 response to water deprivation       BP             4.762889e-08
## 594        34               water transport       BP             9.961951e-07
## 2913      562       transmembrane transport       BP             1.608876e-04
## 921      1045       response to salt stress       BP             3.516633e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2448 GO:0045490            5.987033e-07                0.9999999         13
##      numInCat                     term ontology over_represented_padjust
## 2448      175 pectin catabolic process       BP              0.002268487
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 26   GO:0000103            1.727813e-11                1.0000000          8
## 1342 GO:0010439            1.512440e-07                1.0000000          6
## 32   GO:0000162            8.258252e-07                1.0000000          6
## 1671 GO:0019344            1.563529e-06                0.9999999          6
## 3032 GO:0070814            3.377665e-06                0.9999999          4
## 2917 GO:0055114            8.330312e-06                0.9999966         37
## 1688 GO:0019419            4.109542e-05                0.9999995          3
## 2887 GO:0052542            5.331795e-05                0.9999993          3
## 2443 GO:0045454            5.478737e-05                0.9999914          9
## 2811 GO:0051336            9.850302e-05                0.9999998          2
## 2916 GO:0055091            9.889165e-05                1.0000000          2
## 3006 GO:0070328            9.889165e-05                1.0000000          2
##      numInCat                                             term ontology
## 26         24                             sulfate assimilation       BP
## 1342       28 regulation of glucosinolate biosynthetic process       BP
## 32         35                  tryptophan biosynthetic process       BP
## 1671       41                    cysteine biosynthetic process       BP
## 3032       13            hydrogen sulfide biosynthetic process       BP
## 2917     1923                      oxidation-reduction process       BP
## 1688        8                                sulfate reduction       BP
## 2887        9           defense response by callose deposition       BP
## 2443      253                           cell redox homeostasis       BP
## 2811        3                 regulation of hydrolase activity       BP
## 2916        2                         phospholipid homeostasis       BP
## 3006        2                         triglyceride homeostasis       BP
##      over_represented_padjust
## 26               6.546684e-08
## 1342             2.865317e-04
## 32               1.043017e-03
## 1671             1.481053e-03
## 3032             2.559595e-03
## 2917             5.260592e-03
## 1688             2.224437e-02
## 2887             2.306548e-02
## 2443             2.306548e-02
## 2811             3.122504e-02
## 2916             3.122504e-02
## 3006             3.122504e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 473  GO:0006557            4.108192e-06                0.9999999          4
## 782  GO:0009073            8.005306e-06                0.9999997          5
## 496  GO:0006597            8.693460e-06                0.9999998          4
## 1651 GO:0019079            2.660380e-05                0.9999997          3
## 862  GO:0009423            3.843819e-05                0.9999989          4
## 755  GO:0008295            5.324627e-05                0.9999982          4
## 2913 GO:0055085            1.019863e-04                0.9999667         19
## 1426 GO:0010966            1.173930e-04                0.9999998          2
## 359  GO:0006355            1.467187e-04                0.9999235         49
## 555  GO:0006744            1.552325e-04                0.9999928          4
## 1473 GO:0015800            1.602357e-04                1.0000000          2
## 3423 GO:0110126            1.602357e-04                1.0000000          2
## 3164 GO:0071732            1.698363e-04                0.9999869          5
## 3094 GO:0071281            1.830375e-04                0.9999804          6
##      numInCat                                            term ontology
## 473        13   S-adenosylmethioninamine biosynthetic process       BP
## 782        24 aromatic amino acid family biosynthetic process       BP
## 496        15                   spermine biosynthetic process       BP
## 1651        8                        viral genome replication       BP
## 862        16                 chorismate biosynthetic process       BP
## 755        22                 spermidine biosynthetic process       BP
## 2913      562                         transmembrane transport       BP
## 1426        3               regulation of phosphate transport       BP
## 359      2992      regulation of transcription, DNA-templated       BP
## 555        27                 ubiquinone biosynthetic process       BP
## 1473        2                     acidic amino acid transport       BP
## 3423        2                                  phloem loading       BP
## 3164       52               cellular response to nitric oxide       BP
## 3094       77                   cellular response to iron ion       BP
##      over_represented_padjust
## 473                0.01097984
## 782                0.01097984
## 496                0.01097984
## 1651               0.02520045
## 862                0.02912846
## 755                0.03362502
## 2913               0.04950074
## 1426               0.04950074
## 359                0.04950074
## 555                0.04950074
## 1473               0.04950074
## 3423               0.04950074
## 3164               0.04950074
## 3094               0.04953778
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 859  GO:0009414            2.207438e-07                0.9999999         23
## 960  GO:0009737            2.235441e-06                0.9999993         26
## 972  GO:0009751            4.782115e-06                0.9999990         15
## 952  GO:0009723            2.228710e-05                0.9999957         12
## 1224 GO:0010218            5.247198e-05                0.9999943          7
## 3078 GO:0071215            6.012530e-05                0.9999948          6
## 2106 GO:0034765            7.118921e-05                0.9999956          5
## 860  GO:0009415            8.989023e-05                0.9999984          3
##      numInCat                                        term ontology
## 859       596               response to water deprivation       BP
## 960       832                   response to abscisic acid       BP
## 972       347                  response to salicylic acid       BP
## 952       255                        response to ethylene       BP
## 1224       91                   response to far red light       BP
## 3078       62 cellular response to abscisic acid stimulus       BP
## 2106       33   regulation of ion transmembrane transport       BP
## 860        13                           response to water       BP
##      over_represented_padjust
## 859              0.0008363983
## 960              0.0042350420
## 972              0.0060398107
## 952              0.0211114602
## 1224             0.0379691243
## 3078             0.0379691243
## 2106             0.0385337044
## 860              0.0425742618
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/diurnal34.time.DEG.Kmeans.15cluster.csv")

GO ORA under construction (day 13/14)

# 6 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal1314.time.6) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1588 GO:0016567            1.471241e-10                1.0000000         69
## 634  GO:0006914            7.018011e-09                1.0000000         13
## 594  GO:0006833            2.275995e-08                1.0000000         10
## 359  GO:0006355            7.936683e-06                0.9999951        129
## 1587 GO:0016560            9.946419e-06                0.9999997          5
## 921  GO:0009651            1.614233e-05                0.9999919         57
## 744  GO:0008150            2.829628e-05                0.9999799        220
## 517  GO:0006635            2.903854e-05                0.9999949         11
## 916  GO:0009646            3.561472e-05                0.9999943         10
## 859  GO:0009414            7.037023e-05                0.9999683         36
## 1509 GO:0015914            7.482601e-05                0.9999960          5
## 770  GO:0008652            1.276318e-04                0.9999957          4
## 2745 GO:0050793            1.728096e-04                0.9999977          3
## 1183 GO:0010150            1.802934e-04                0.9999411         18
##      numInCat                                           term ontology
## 1588      988                         protein ubiquitination       BP
## 634        58                                      autophagy       BP
## 594        34                                water transport       BP
## 359      2992     regulation of transcription, DNA-templated       BP
## 1587       11 protein import into peroxisome matrix, docking       BP
## 921      1045                        response to salt stress       BP
## 744      6445                             biological_process       BP
## 517        76                      fatty acid beta-oxidation       BP
## 916        67                   response to absence of light       BP
## 859       596                  response to water deprivation       BP
## 1509       15                         phospholipid transport       BP
## 770         9       cellular amino acid biosynthetic process       BP
## 2745        5            regulation of developmental process       BP
## 1183      219                                leaf senescence       BP
##      over_represented_padjust
## 1588             5.574531e-07
## 634              1.329562e-05
## 594              2.874581e-05
## 359              7.518023e-03
## 1587             7.537396e-03
## 921              1.019388e-02
## 744              1.375338e-02
## 517              1.375338e-02
## 916              1.499380e-02
## 859              2.577416e-02
## 1509             2.577416e-02
## 770              4.029973e-02
## 2745             4.879512e-02
## 1183             4.879512e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 854  GO:0009408            8.109517e-14                1.0000000         52
## 419  GO:0006457            2.219551e-12                1.0000000         60
## 2437 GO:0045292            3.050226e-07                1.0000000         12
## 69   GO:0000381            3.274638e-07                1.0000000         13
## 211  GO:0005983            1.433359e-06                0.9999998         11
## 26   GO:0000103            2.793320e-06                0.9999998          9
## 76   GO:0000398            3.051956e-05                0.9999896         24
## 762  GO:0008380            6.536873e-05                0.9999780         22
## 2811 GO:0051336            7.685834e-05                1.0000000          3
## 2239 GO:0042542            1.023995e-04                0.9999687         18
## 32   GO:0000162            1.320460e-04                0.9999794          9
## 2768 GO:0051085            1.382397e-04                0.9999647         14
## 1628 GO:0018131            1.917710e-04                1.0000000          3
## 739  GO:0007623            1.928278e-04                0.9999276         23
## 2575 GO:0046686            1.975734e-04                0.9998852         65
## 855  GO:0009409            2.194944e-04                0.9998755         59
## 2088 GO:0034605            2.228418e-04                0.9999304         17
## 562  GO:0006760            2.364390e-04                0.9999976          3
##      numInCat                                                     term ontology
## 854       305                                         response to heat       BP
## 419       423                                          protein folding       BP
## 2437       39                       mRNA cis splicing, via spliceosome       BP
## 69         45 regulation of alternative mRNA splicing, via spliceosome       BP
## 211        33                                 starch catabolic process       BP
## 26         24                                     sulfate assimilation       BP
## 76        182                           mRNA splicing, via spliceosome       BP
## 762       152                                             RNA splicing       BP
## 2811        3                         regulation of hydrolase activity       BP
## 2239      130                            response to hydrogen peroxide       BP
## 32         35                          tryptophan biosynthetic process       BP
## 2768       82           chaperone cofactor-dependent protein refolding       BP
## 1628        3                 oxazole or thiazole biosynthetic process       BP
## 739       181                                         circadian rhythm       BP
## 2575      753                                  response to cadmium ion       BP
## 855       696                                         response to cold       BP
## 2088      117                                cellular response to heat       BP
## 562         4         folic acid-containing compound metabolic process       BP
##      over_represented_padjust
## 854              3.072696e-10
## 419              4.204939e-09
## 2437             3.101901e-04
## 69               3.101901e-04
## 211              1.086200e-03
## 26               1.763981e-03
## 76               1.651980e-02
## 762              3.096027e-02
## 2811             3.235736e-02
## 2239             3.879916e-02
## 32               4.364918e-02
## 2768             4.364918e-02
## 1628             4.966751e-02
## 739              4.966751e-02
## 2575             4.966751e-02
## 855              4.966751e-02
## 2088             4.966751e-02
## 562              4.977041e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 364  GO:0006364            2.393196e-34                1.0000000         50
## 990  GO:0009793            9.754393e-11                1.0000000         65
## 254  GO:0006096            1.566697e-08                1.0000000         21
## 2196 GO:0042026            2.872373e-08                1.0000000         17
## 86   GO:0000462            1.675822e-07                1.0000000         11
## 872  GO:0009553            1.909858e-07                1.0000000         22
## 1851 GO:0031167            1.125246e-06                1.0000000          6
## 419  GO:0006457            1.200376e-06                0.9999996         33
## 880  GO:0009561            5.279449e-06                0.9999995          9
## 2211 GO:0042254            1.210502e-05                0.9999971         16
## 855  GO:0009409            2.111775e-05                0.9999905         41
## 1139 GO:0010074            5.263969e-05                0.9999990          4
## 1416 GO:0010922            5.747121e-05                1.0000000          3
## 2220 GO:0042326            5.747121e-05                1.0000000          3
## 2214 GO:0042273            6.080392e-05                0.9999950          6
## 518  GO:0006636            7.199516e-05                0.9999905          8
## 391  GO:0006413            7.772936e-05                0.9999773         17
## 390  GO:0006412            1.156609e-04                0.9999520         28
## 688  GO:0007088            1.312060e-04                0.9999781          9
## 1746 GO:0019919            1.445912e-04                0.9999907          5
## 8    GO:0000027            1.473313e-04                0.9999778          8
## 752  GO:0008283            1.595805e-04                0.9999763          8
## 9    GO:0000028            1.669040e-04                0.9999785          7
## 836  GO:0009294            1.858305e-04                0.9999680          9
## 1646 GO:0018377            1.875561e-04                1.0000000          3
## 102  GO:0000494            2.623940e-04                0.9999947          3
## 3681 GO:1990258            2.623940e-04                0.9999947          3
## 1610 GO:0017126            2.643548e-04                1.0000000          2
## 87   GO:0000463            2.973289e-04                0.9999761          5
## 3032 GO:0070814            3.926556e-04                0.9999795          4
##      numInCat
## 364       160
## 990       767
## 254       138
## 2196       97
## 86         49
## 872       144
## 1851       13
## 419       423
## 880        36
## 2211      236
## 855       696
## 1139        6
## 1416        3
## 2220        3
## 2214       32
## 518        49
## 391       182
## 390       715
## 688        61
## 1746       17
## 8          72
## 752        51
## 9          61
## 836        51
## 1646        3
## 102         7
## 3681        7
## 1610        2
## 87         23
## 3032       13
##                                                                                          term
## 364                                                                           rRNA processing
## 990                                                embryo development ending in seed dormancy
## 254                                                                        glycolytic process
## 2196                                                                        protein refolding
## 86   maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 872                                                                    embryo sac development
## 1851                                                                         rRNA methylation
## 419                                                                           protein folding
## 880                                                                         megagametogenesis
## 2211                                                                      ribosome biogenesis
## 855                                                                          response to cold
## 1139                                                         maintenance of meristem identity
## 1416                                              positive regulation of phosphatase activity
## 2220                                                   negative regulation of phosphorylation
## 2214                                                       ribosomal large subunit biogenesis
## 518                                               unsaturated fatty acid biosynthetic process
## 391                                                                  translational initiation
## 390                                                                               translation
## 688                                                    regulation of mitotic nuclear division
## 1746                         peptidyl-arginine methylation, to asymmetrical-dimethyl arginine
## 8                                                            ribosomal large subunit assembly
## 752                                                                        cell proliferation
## 9                                                            ribosomal small subunit assembly
## 836                                                               DNA mediated transformation
## 1646                                                                   protein myristoylation
## 102                                                          box C/D snoRNA 3'-end processing
## 3681                                                            histone glutamine methylation
## 1610                                                                          nucleologenesis
## 87   maturation of LSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 3032                                                    hydrogen sulfide biosynthetic process
##      ontology over_represented_padjust
## 364        BP             9.067820e-31
## 990        BP             1.847970e-07
## 254        BP             1.978738e-05
## 2196       BP             2.720855e-05
## 86         BP             1.206075e-04
## 872        BP             1.206075e-04
## 1851       BP             5.685282e-04
## 419        BP             5.685282e-04
## 880        BP             2.222648e-03
## 2211       BP             4.586591e-03
## 855        BP             7.274105e-03
## 1139       BP             1.535907e-02
## 1416       BP             1.535907e-02
## 2220       BP             1.535907e-02
## 2214       BP             1.535907e-02
## 518        BP             1.704935e-02
## 391        BP             1.732450e-02
## 390        BP             2.434662e-02
## 688        BP             2.616524e-02
## 1746       BP             2.658278e-02
## 8          BP             2.658278e-02
## 752        BP             2.748412e-02
## 9          BP             2.749562e-02
## 836        BP             2.842600e-02
## 1646       BP             2.842600e-02
## 102        BP             3.577287e-02
## 3681       BP             3.577287e-02
## 1610       BP             3.577287e-02
## 87         BP             3.884756e-02
## 3032       BP             4.959241e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 620  GO:0006888            1.556063e-10                1.0000000         26
## 2917 GO:0055114            1.904872e-10                1.0000000        134
## 618  GO:0006886            2.498335e-09                1.0000000         42
## 1291 GO:0010345            1.341486e-08                1.0000000         13
## 960  GO:0009737            3.064785e-07                1.0000000         62
## 1564 GO:0016192            3.254301e-07                0.9999999         26
## 2634 GO:0048280            1.789761e-06                0.9999999          7
## 1933 GO:0032482            1.531185e-05                0.9999970         13
## 1008 GO:0009826            1.560475e-05                0.9999946         26
## 1523 GO:0015991            1.684641e-05                0.9999970         12
## 675  GO:0007035            2.095010e-05                0.9999983          7
## 278  GO:0006152            2.128646e-05                0.9999997          4
## 859  GO:0009414            2.477618e-05                0.9999885         44
## 2716 GO:0048767            3.373106e-05                0.9999924         14
## 660  GO:0007010            3.578244e-05                0.9999937         11
## 662  GO:0007017            5.510316e-05                0.9999907         10
## 2241 GO:0042546            6.865808e-05                0.9999844         13
## 2478 GO:0045839            8.048514e-05                0.9999950          5
## 41   GO:0000226            9.013085e-05                0.9999789         13
## 921  GO:0009651            9.382861e-05                0.9999469         65
## 2201 GO:0042147            1.111789e-04                0.9999845          8
## 995  GO:0009807            1.151905e-04                0.9999896          6
## 515  GO:0006631            1.720986e-04                0.9999601         12
## 3473 GO:1901001            1.754371e-04                0.9999833          6
## 2470 GO:0045786            1.967869e-04                0.9999850          5
## 1327 GO:0010411            2.258519e-04                0.9999501         11
## 517  GO:0006635            2.303113e-04                0.9999491         11
## 2114 GO:0034976            2.949048e-04                0.9999454          9
##      numInCat                                            term ontology
## 620       166          ER to Golgi vesicle-mediated transport       BP
## 2917     1923                     oxidation-reduction process       BP
## 618       428                 intracellular protein transport       BP
## 1291       41                    suberin biosynthetic process       BP
## 960       832                       response to abscisic acid       BP
## 1564      244                      vesicle-mediated transport       BP
## 2634       20             vesicle fusion with Golgi apparatus       BP
## 1933      109                 Rab protein signal transduction       BP
## 1008      256                      unidimensional cell growth       BP
## 1523       81         ATP hydrolysis coupled proton transport       BP
## 675        23                          vacuolar acidification       BP
## 278         6             purine nucleoside catabolic process       BP
## 859       596                   response to water deprivation       BP
## 2716       99                            root hair elongation       BP
## 660        67                       cytoskeleton organization       BP
## 662        65                       microtubule-based process       BP
## 2241       95                            cell wall biogenesis       BP
## 2478       27 negative regulation of mitotic nuclear division       BP
## 41         98           microtubule cytoskeleton organization       BP
## 921      1045                         response to salt stress       BP
## 2201       45         retrograde transport, endosome to Golgi       BP
## 995        28                     lignan biosynthetic process       BP
## 515        85                    fatty acid metabolic process       BP
## 3473       24  negative regulation of response to salt stress       BP
## 2470       30               negative regulation of cell cycle       BP
## 1327       77                    xyloglucan metabolic process       BP
## 517        76                       fatty acid beta-oxidation       BP
## 2114       55        response to endoplasmic reticulum stress       BP
##      over_represented_padjust
## 620              3.608781e-07
## 2917             3.608781e-07
## 618              3.155397e-06
## 1291             1.270723e-05
## 960              2.055091e-04
## 1564             2.055091e-04
## 2634             9.687718e-04
## 1933             6.383104e-03
## 1008             6.383104e-03
## 1523             6.383104e-03
## 675              6.721200e-03
## 278              6.721200e-03
## 859              7.221303e-03
## 2716             9.038644e-03
## 660              9.038644e-03
## 662              1.304912e-02
## 2241             1.530268e-02
## 2478             1.694212e-02
## 41               1.777583e-02
## 921              1.777583e-02
## 2201             1.983894e-02
## 995              1.983894e-02
## 515              2.769713e-02
## 3473             2.769713e-02
## 2470             2.982503e-02
## 1327             3.232036e-02
## 517              3.232036e-02
## 2114             3.990694e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 359  GO:0006355            3.484240e-16                1.0000000        191
## 957  GO:0009734            1.013408e-08                1.0000000         40
## 859  GO:0009414            2.487774e-07                1.0000000         51
## 862  GO:0009423            2.699999e-07                1.0000000          8
## 956  GO:0009733            3.197026e-07                0.9999999         47
## 782  GO:0009073            6.406436e-07                1.0000000          9
## 2486 GO:0045892            5.542665e-06                0.9999979         33
## 2712 GO:0048756            1.071621e-05                0.9999999          4
## 739  GO:0007623            1.173405e-05                0.9999966         21
## 960  GO:0009737            1.315675e-05                0.9999935         59
## 1380 GO:0010583            2.770717e-05                0.9999948         12
## 2575 GO:0046686            3.146692e-05                0.9999840         54
## 2586 GO:0046827            4.819892e-05                1.0000000          3
## 153  GO:0001666            6.228472e-05                0.9999861         13
## 1278 GO:0010315            8.760525e-05                0.9999871          9
## 2578 GO:0046719            1.186981e-04                0.9999991          3
## 472  GO:0006556            1.295855e-04                0.9999960          4
## 646  GO:0006979            1.321049e-04                0.9999392         35
## 2646 GO:0048364            1.426994e-04                0.9999364         33
## 3270 GO:0080167            1.467287e-04                0.9999466         22
## 992  GO:0009800            1.501745e-04                0.9999922          5
## 946  GO:0009699            1.798635e-04                0.9999741          8
## 1737 GO:0019761            2.377272e-04                0.9999519         10
## 908  GO:0009638            2.477560e-04                0.9999574          9
## 921  GO:0009651            2.871091e-04                0.9998322         62
## 1183 GO:0010150            2.934480e-04                0.9998909         21
## 2689 GO:0048574            2.946741e-04                0.9999612          7
##      numInCat                                                     term ontology
## 359      2992               regulation of transcription, DNA-templated       BP
## 957       377                        auxin-activated signaling pathway       BP
## 859       596                            response to water deprivation       BP
## 862        16                          chorismate biosynthetic process       BP
## 956       612                                        response to auxin       BP
## 782        24          aromatic amino acid family biosynthetic process       BP
## 2486      355      negative regulation of transcription, DNA-templated       BP
## 2712        5                               sieve cell differentiation       BP
## 739       181                                         circadian rhythm       BP
## 960       832                                response to abscisic acid       BP
## 1380       67                               response to cyclopentenone       BP
## 2575      753                                  response to cadmium ion       BP
## 2586        3       positive regulation of protein export from nucleus       BP
## 153        83                                      response to hypoxia       BP
## 1278       34                                             auxin efflux       BP
## 2578        4 regulation by virus of viral protein levels in host cell       BP
## 472         8                S-adenosylmethionine biosynthetic process       BP
## 646       502                             response to oxidative stress       BP
## 2646      326                                         root development       BP
## 3270      254                                     response to karrikin       BP
## 992        10                       cinnamic acid biosynthetic process       BP
## 946        34                     phenylpropanoid biosynthetic process       BP
## 1737       69                       glucosinolate biosynthetic process       BP
## 908        37                                             phototropism       BP
## 921      1045                                  response to salt stress       BP
## 1183      219                                          leaf senescence       BP
## 2689       30                       long-day photoperiodism, flowering       BP
##      over_represented_padjust
## 359              1.320178e-12
## 957              1.919902e-05
## 859              2.422706e-04
## 862              2.422706e-04
## 956              2.422706e-04
## 782              4.045664e-04
## 2486             3.000165e-03
## 2712             4.940037e-03
## 739              4.940037e-03
## 960              4.985094e-03
## 1380             9.543861e-03
## 2575             9.935681e-03
## 2586             1.404813e-02
## 153              1.685692e-02
## 1278             2.212909e-02
## 2578             2.709577e-02
## 472              2.709577e-02
## 646              2.709577e-02
## 2646             2.709577e-02
## 3270             2.709577e-02
## 992              2.709577e-02
## 946              3.097739e-02
## 1737             3.911448e-02
## 908              3.911448e-02
## 921              4.135259e-02
## 1183             4.135259e-02
## 2689             4.135259e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 359  GO:0006355            3.468321e-11                1.0000000        327
## 1210 GO:0010200            3.675982e-09                1.0000000         51
## 2795 GO:0051259            7.459472e-08                1.0000000         10
## 914  GO:0009644            1.306012e-07                1.0000000         26
## 2239 GO:0042542            1.748421e-07                1.0000000         28
## 1270 GO:0010286            4.168907e-07                0.9999999         24
## 1183 GO:0010150            9.219324e-07                0.9999997         40
## 646  GO:0006979            2.174960e-06                0.9999991         68
## 58   GO:0000302            2.502004e-06                0.9999994         21
## 2917 GO:0055114            7.200448e-06                0.9999952        208
## 744  GO:0008150            1.202850e-05                0.9999905        539
## 910  GO:0009640            7.333506e-05                0.9999737         25
## 3270 GO:0080167            1.128228e-04                0.9999484         37
## 1404 GO:0010729            1.168180e-04                0.9999983          4
## 3164 GO:0071732            1.652231e-04                0.9999614         13
##      numInCat                                                          term
## 359      2992                    regulation of transcription, DNA-templated
## 1210      286                                            response to chitin
## 2795       22                               protein complex oligomerization
## 914       109                              response to high light intensity
## 2239      130                                 response to hydrogen peroxide
## 1270       99                                              heat acclimation
## 1183      219                                               leaf senescence
## 646       502                                  response to oxidative stress
## 58         96                           response to reactive oxygen species
## 2917     1923                                   oxidation-reduction process
## 744      6445                                            biological_process
## 910       129                                            photomorphogenesis
## 3270      254                                          response to karrikin
## 1404        5 positive regulation of hydrogen peroxide biosynthetic process
## 3164       52                             cellular response to nitric oxide
##      ontology over_represented_padjust
## 359        BP             1.314147e-07
## 1210       BP             6.964148e-06
## 2795       BP             9.421314e-05
## 914        BP             1.237120e-04
## 2239       BP             1.324953e-04
## 1270       BP             2.632665e-04
## 1183       BP             4.990288e-04
## 646        BP             1.030116e-03
## 58         BP             1.053344e-03
## 2917       BP             2.728250e-03
## 744        BP             4.143271e-03
## 910        BP             2.315555e-02
## 3270       BP             3.161595e-02
## 1404       BP             3.161595e-02
## 3164       BP             4.173537e-02
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/diurnal1314.time.DEG.Kmeans.5cluster.csv")
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.diurnal1314.time.DEG.spread$transcript_ID, cluster=kClusters.diurnal1314.time.15) %>% 
  group_by(cluster) %>%
  nest(transcripts=transcript_ID) %>%
  mutate(GO_result = map(transcripts, ~ GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(pull(.)))) 
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1588 GO:0016567            3.849482e-10                1.0000000         45
## 634  GO:0006914            1.903491e-07                1.0000000          9
## 916  GO:0009646            1.217681e-05                0.9999987          8
## 470  GO:0006552            2.752624e-05                0.9999994          4
## 1432 GO:0015031            2.871240e-05                0.9999915         19
## 517  GO:0006635            4.185058e-05                0.9999947          8
## 1509 GO:0015914            9.164722e-05                0.9999966          4
## 13   GO:0000045            9.901920e-05                0.9999908          6
##      numInCat                         term ontology over_represented_padjust
## 1588      988       protein ubiquitination       BP             1.458569e-06
## 634        58                    autophagy       BP             3.606163e-04
## 916        67 response to absence of light       BP             1.537931e-02
## 470        10    leucine catabolic process       BP             2.175826e-02
## 1432      423            protein transport       BP             2.175826e-02
## 517        76    fatty acid beta-oxidation       BP             2.642864e-02
## 1509       15       phospholipid transport       BP             4.689797e-02
## 13         45       autophagosome assembly       BP             4.689797e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 744  GO:0008150            1.397610e-09                        1        100
## 2614 GO:0048096            2.330488e-05                        1          2
##      numInCat                                            term ontology
## 744      6445                              biological_process       BP
## 2614        2 chromatin-mediated maintenance of transcription       BP
##      over_represented_padjust
## 744              5.295543e-06
## 2614             4.415110e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 26   GO:0000103            2.273003e-08                1.0000000          8
## 3032 GO:0070814            1.252714e-07                1.0000000          6
## 1002 GO:0009817            1.682569e-07                1.0000000         13
## 2917 GO:0055114            6.622543e-07                0.9999998         77
## 1166 GO:0010112            6.403056e-06                0.9999997          6
## 1708 GO:0019509            2.483389e-05                0.9999994          4
## 899  GO:0009626            4.362989e-05                0.9999904         13
## 2913 GO:0055085            5.613172e-05                0.9999760         33
## 1694 GO:0019438            5.818906e-05                0.9999953          6
## 1783 GO:0030187            8.338058e-05                0.9999971          4
##      numInCat                                                 term ontology
## 26         24                                 sulfate assimilation       BP
## 3032       13                hydrogen sulfide biosynthetic process       BP
## 1002      100 defense response to fungus, incompatible interaction       BP
## 2917     1923                          oxidation-reduction process       BP
## 1166       22           regulation of systemic acquired resistance       BP
## 1708       10        L-methionine salvage from methylthioadenosine       BP
## 899       140                   plant-type hypersensitive response       BP
## 2913      562                              transmembrane transport       BP
## 1694       33               aromatic compound biosynthetic process       BP
## 1783       13                       melatonin biosynthetic process       BP
##      over_represented_padjust
## 26               0.0000861241
## 3032             0.0002125084
## 1002             0.0002125084
## 2917             0.0006273204
## 1166             0.0048522357
## 1708             0.0156826022
## 899              0.0236162341
## 2913             0.0244975939
## 1694             0.0244975939
## 1783             0.0315929024
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 960  GO:0009737            9.489755e-15                1.0000000         36
## 859  GO:0009414            3.108692e-14                1.0000000         30
## 860  GO:0009415            6.971840e-09                1.0000000          5
## 2913 GO:0055085            1.199155e-08                1.0000000         24
## 921  GO:0009651            3.522399e-07                0.9999999         28
## 904  GO:0009631            1.357395e-06                0.9999999          8
## 961  GO:0009738            3.664216e-06                0.9999992         16
## 642  GO:0006970            3.783913e-06                0.9999994         12
## 517  GO:0006635            8.401934e-06                0.9999993          7
## 834  GO:0009269            1.161893e-05                0.9999995          5
## 3403 GO:0098712            3.421071e-05                0.9999997          3
## 3031 GO:0070813            6.638609e-05                1.0000000          2
##      numInCat                                      term ontology
## 960       832                 response to abscisic acid       BP
## 859       596             response to water deprivation       BP
## 860        13                         response to water       BP
## 2913      562                   transmembrane transport       BP
## 921      1045                   response to salt stress       BP
## 904       104                          cold acclimation       BP
## 961       437 abscisic acid-activated signaling pathway       BP
## 642       252                response to osmotic stress       BP
## 517        76                 fatty acid beta-oxidation       BP
## 834        37                   response to desiccation       BP
## 3403        7 L-glutamate import across plasma membrane       BP
## 3031        2        hydrogen sulfide metabolic process       BP
##      over_represented_padjust
## 960              3.595668e-11
## 859              5.889416e-11
## 860              8.805434e-06
## 2913             1.135899e-05
## 921              2.669274e-04
## 904              8.571946e-04
## 961              1.792156e-03
## 642              1.792156e-03
## 517              3.537214e-03
## 834              4.402412e-03
## 3403             1.178403e-02
## 3031             2.096141e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2437 GO:0045292            8.623797e-09                1.0000000         10
## 419  GO:0006457            1.109566e-08                1.0000000         31
## 69   GO:0000381            4.779232e-08                1.0000000         10
## 854  GO:0009408            6.337182e-07                0.9999998         23
## 211  GO:0005983            1.146102e-05                0.9999991          7
## 499  GO:0006606            3.151611e-05                0.9999956          9
## 1860 GO:0031365            3.196444e-05                0.9999993          4
## 2575 GO:0046686            4.006781e-05                0.9999824         36
## 75   GO:0000395            4.119843e-05                0.9999989          4
## 76   GO:0000398            6.765334e-05                0.9999832         14
## 762  GO:0008380            8.408582e-05                0.9999801         13
##      numInCat                                                     term ontology
## 2437       39                       mRNA cis splicing, via spliceosome       BP
## 419       423                                          protein folding       BP
## 69         45 regulation of alternative mRNA splicing, via spliceosome       BP
## 854       305                                         response to heat       BP
## 211        33                                 starch catabolic process       BP
## 499        67                              protein import into nucleus       BP
## 1860        9               N-terminal protein amino acid modification       BP
## 2575      753                                  response to cadmium ion       BP
## 75         10                          mRNA 5'-splice site recognition       BP
## 76        182                           mRNA splicing, via spliceosome       BP
## 762       152                                             RNA splicing       BP
##      over_represented_padjust
## 2437             2.102073e-05
## 419              2.102073e-05
## 69               6.036170e-05
## 854              6.002896e-04
## 211              8.685159e-03
## 499              1.730189e-02
## 1860             1.730189e-02
## 2575             1.734454e-02
## 75               1.734454e-02
## 76               2.563385e-02
## 762              2.896374e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 662  GO:0007017            3.354846e-08                1.0000000         11
## 41   GO:0000226            1.098832e-07                1.0000000         13
## 620  GO:0006888            1.670693e-07                1.0000000         16
## 618  GO:0006886            4.872304e-06                0.9999985         24
## 2478 GO:0045839            2.238550e-05                0.9999989          5
## 239  GO:0006048            2.927616e-05                0.9999993          4
## 50   GO:0000278            3.234985e-05                0.9999930         13
## 2470 GO:0045786            4.572597e-05                0.9999975          5
##      numInCat                                            term ontology
## 662        65                       microtubule-based process       BP
## 41         98           microtubule cytoskeleton organization       BP
## 620       166          ER to Golgi vesicle-mediated transport       BP
## 618       428                 intracellular protein transport       BP
## 2478       27 negative regulation of mitotic nuclear division       BP
## 239         9    UDP-N-acetylglucosamine biosynthetic process       BP
## 50        159                              mitotic cell cycle       BP
## 2470       30               negative regulation of cell cycle       BP
##      over_represented_padjust
## 662              0.0001271151
## 41               0.0002081737
## 620              0.0002110086
## 618              0.0046152903
## 2478             0.0169637347
## 239              0.0175105132
## 50               0.0175105132
## 2470             0.0216569634
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 2917 GO:0055114            5.631648e-09                1.0000000         65
## 1126 GO:0010054            7.040200e-08                1.0000000          5
## 997  GO:0009809            3.198873e-06                0.9999996         11
## 676  GO:0007043            3.522516e-06                0.9999999          4
## 3200 GO:0072732            7.884618e-06                1.0000000          3
## 994  GO:0009805            1.800113e-05                0.9999996          4
## 2716 GO:0048767            3.284954e-05                0.9999953          9
## 957  GO:0009734            4.335307e-05                0.9999878         17
## 1663 GO:0019287            6.369490e-05                0.9999979          4
## 1125 GO:0010053            8.531885e-05                0.9999924          6
## 1008 GO:0009826            9.928023e-05                0.9999742         14
## 3110 GO:0071365            1.395094e-04                0.9999900          5
##      numInCat                                                             term
## 2917     1923                                      oxidation-reduction process
## 1126        8                                      trichoblast differentiation
## 997       116                                      lignin biosynthetic process
## 676        12                                      cell-cell junction assembly
## 3200        4                      cellular response to calcium ion starvation
## 994        11                                    coumarin biosynthetic process
## 2716       99                                             root hair elongation
## 957       377                                auxin-activated signaling pathway
## 1663       13 isopentenyl diphosphate biosynthetic process, mevalonate pathway
## 1125       42                              root epidermal cell differentiation
## 1008      256                                       unidimensional cell growth
## 3110       36                              cellular response to auxin stimulus
##      ontology over_represented_padjust
## 2917       BP             2.133832e-05
## 1126       BP             1.333766e-04
## 997        BP             3.336703e-03
## 676        BP             3.336703e-03
## 3200       BP             5.974964e-03
## 994        BP             1.136772e-02
## 2716       BP             1.778099e-02
## 957        BP             2.053310e-02
## 1663       BP             2.681555e-02
## 1125       BP             3.232731e-02
## 1008       BP             3.419753e-02
## 3110       BP             4.405009e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 364  GO:0006364            4.792368e-47                1.0000000         49
## 990  GO:0009793            4.674821e-14                1.0000000         49
## 419  GO:0006457            1.414484e-10                1.0000000         28
## 86   GO:0000462            1.757274e-10                1.0000000         11
## 2196 GO:0042026            2.344270e-10                1.0000000         15
## 2211 GO:0042254            1.795168e-08                1.0000000         14
## 872  GO:0009553            4.048503e-08                1.0000000         17
## 390  GO:0006412            9.616713e-08                1.0000000         22
## 8    GO:0000027            4.425042e-07                1.0000000          8
## 2214 GO:0042273            6.806677e-07                1.0000000          6
## 1851 GO:0031167            9.426005e-07                1.0000000          5
## 378  GO:0006396            2.453011e-06                0.9999997         11
## 1746 GO:0019919            4.730782e-06                0.9999999          5
## 880  GO:0009561            6.296014e-06                0.9999995          7
## 391  GO:0006413            1.059482e-05                0.9999979         13
## 87   GO:0000463            1.134493e-05                0.9999995          5
## 9    GO:0000028            1.505072e-05                0.9999990          6
## 102  GO:0000494            2.136054e-05                0.9999998          3
## 3681 GO:1990258            2.136054e-05                0.9999998          3
## 2109 GO:0034969            2.237774e-05                0.9999990          5
## 1610 GO:0017126            4.358185e-05                1.0000000          2
## 146  GO:0001510            5.474898e-05                0.9999982          4
## 289  GO:0006189            6.481222e-05                0.9999963          5
## 1144 GO:0010080            6.551240e-05                0.9999994          3
## 3059 GO:0071028            7.511706e-05                0.9999989          3
## 1192 GO:0010162            7.827587e-05                0.9999952          5
## 2768 GO:0051085            8.666422e-05                0.9999878          8
## 988  GO:0009790            1.180523e-04                0.9999729         12
## 2075 GO:0034475            1.211075e-04                0.9999978          3
## 658  GO:0007005            1.283551e-04                0.9999810          8
## 82   GO:0000454            1.509351e-04                0.9999996          2
## 752  GO:0008283            1.612840e-04                0.9999835          6
## 2575 GO:0046686            1.739260e-04                0.9999277         27
## 2416 GO:0045037            2.497229e-04                0.9999729          6
## 596  GO:0006839            2.547212e-04                0.9999712          6
## 3261 GO:0080156            2.780841e-04                0.9999685          6
## 1583 GO:0016554            2.950233e-04                0.9999749          5
## 1139 GO:0010074            3.049828e-04                0.9999941          3
## 2067 GO:0034427            3.106374e-04                0.9999921          3
## 2455 GO:0045604            3.423867e-04                0.9999932          3
## 455  GO:0006527            3.610280e-04                0.9999919          3
## 77   GO:0000413            5.254751e-04                0.9999477          5
## 3062 GO:0071035            5.482813e-04                0.9999841          3
##      numInCat
## 364       160
## 990       767
## 419       423
## 86         49
## 2196       97
## 2211      236
## 872       144
## 390       715
## 8          72
## 2214       32
## 1851       13
## 378       103
## 1746       17
## 880        36
## 391       182
## 87         23
## 9          61
## 102         7
## 3681        7
## 2109       20
## 1610        2
## 146        16
## 289        21
## 1144        5
## 3059        8
## 1192       28
## 2768       82
## 988       176
## 2075       10
## 658        78
## 82          3
## 752        51
## 2575      753
## 2416       39
## 596        65
## 3261       52
## 1583       39
## 1139        6
## 2067       11
## 2455        6
## 455         7
## 77         59
## 3062        9
##                                                                                          term
## 364                                                                           rRNA processing
## 990                                                embryo development ending in seed dormancy
## 419                                                                           protein folding
## 86   maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 2196                                                                        protein refolding
## 2211                                                                      ribosome biogenesis
## 872                                                                    embryo sac development
## 390                                                                               translation
## 8                                                            ribosomal large subunit assembly
## 2214                                                       ribosomal large subunit biogenesis
## 1851                                                                         rRNA methylation
## 378                                                                            RNA processing
## 1746                         peptidyl-arginine methylation, to asymmetrical-dimethyl arginine
## 880                                                                         megagametogenesis
## 391                                                                  translational initiation
## 87   maturation of LSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)
## 9                                                            ribosomal small subunit assembly
## 102                                                          box C/D snoRNA 3'-end processing
## 3681                                                            histone glutamine methylation
## 2109                                                             histone arginine methylation
## 1610                                                                          nucleologenesis
## 146                                                                           RNA methylation
## 289                                                        'de novo' IMP biosynthetic process
## 1144                                                     regulation of floral meristem growth
## 3059                                                                nuclear mRNA surveillance
## 1192                                                                    seed dormancy process
## 2768                                           chaperone cofactor-dependent protein refolding
## 988                                                                        embryo development
## 2075                                                               U4 snRNA 3'-end processing
## 658                                                                mitochondrion organization
## 82                                                 snoRNA guided rRNA pseudouridine synthesis
## 752                                                                        cell proliferation
## 2575                                                                  response to cadmium ion
## 2416                                                   protein import into chloroplast stroma
## 596                                                                   mitochondrial transport
## 3261                                                          mitochondrial mRNA modification
## 1583                                                              cytidine to uridine editing
## 1139                                                         maintenance of meristem identity
## 2067                        nuclear-transcribed mRNA catabolic process, exonucleolytic, 3'-5'
## 2455                                             regulation of epidermal cell differentiation
## 455                                                                arginine catabolic process
## 77                                                      protein peptidyl-prolyl isomerization
## 3062                                 nuclear polyadenylation-dependent rRNA catabolic process
##      ontology over_represented_padjust
## 364        BP             1.815828e-43
## 990        BP             8.856448e-11
## 419        BP             1.664578e-07
## 86         BP             1.664578e-07
## 2196       BP             1.776488e-07
## 2211       BP             1.133649e-05
## 872        BP             2.191397e-05
## 390        BP             4.554716e-05
## 8          BP             1.862943e-04
## 2214       BP             2.579050e-04
## 1851       BP             3.246830e-04
## 378        BP             7.745382e-04
## 1746       BP             1.378841e-03
## 880        BP             1.703971e-03
## 391        BP             2.676251e-03
## 87         BP             2.686622e-03
## 9          BP             3.354540e-03
## 102        BP             4.239462e-03
## 3681       BP             4.239462e-03
## 2109       BP             4.239462e-03
## 1610       BP             7.863411e-03
## 146        BP             9.429268e-03
## 289        BP             1.034277e-02
## 1144       BP             1.034277e-02
## 3059       BP             1.138474e-02
## 1192       BP             1.140720e-02
## 2768       BP             1.216188e-02
## 988        BP             1.582332e-02
## 2075       BP             1.582332e-02
## 658        BP             1.621124e-02
## 82         BP             1.844816e-02
## 752        BP             1.909703e-02
## 2575       BP             1.996986e-02
## 2416       BP             2.757539e-02
## 596        BP             2.757539e-02
## 3261       BP             2.926835e-02
## 1583       BP             3.017962e-02
## 1139       BP             3.017962e-02
## 2067       BP             3.017962e-02
## 2455       BP             3.243258e-02
## 455        BP             3.336427e-02
## 77         BP             4.740536e-02
## 3062       BP             4.831251e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 359  GO:0006355            3.774479e-15                1.0000000         91
## 1737 GO:0019761            3.841331e-09                1.0000000         11
## 957  GO:0009734            3.980710e-08                1.0000000         21
## 992  GO:0009800            7.273723e-07                1.0000000          5
## 475  GO:0006559            3.410267e-06                0.9999998          6
## 956  GO:0009733            5.299121e-06                0.9999984         22
## 2486 GO:0045892            8.208963e-06                0.9999980         17
## 946  GO:0009699            2.078197e-05                0.9999986          6
## 3086 GO:0071249            2.698712e-05                0.9999981          6
## 793  GO:0009098            5.645881e-05                0.9999967          5
## 740  GO:0007639            1.448675e-04                0.9999976          3
##      numInCat                                                term ontology
## 359      2992          regulation of transcription, DNA-templated       BP
## 1737       69                  glucosinolate biosynthetic process       BP
## 957       377                   auxin-activated signaling pathway       BP
## 992        10                  cinnamic acid biosynthetic process       BP
## 475        27                   L-phenylalanine catabolic process       BP
## 956       612                                   response to auxin       BP
## 2486      355 negative regulation of transcription, DNA-templated       BP
## 946        34                phenylpropanoid biosynthetic process       BP
## 3086       30                        cellular response to nitrate       BP
## 793        30                        leucine biosynthetic process       BP
## 740         7             homeostasis of number of meristem cells       BP
##      over_represented_padjust
## 359              1.430150e-11
## 1737             7.277402e-06
## 957              5.027637e-05
## 992              6.890034e-04
## 475              2.584300e-03
## 956              3.346395e-03
## 2486             4.443394e-03
## 946              9.842862e-03
## 3086             1.136158e-02
## 793              2.139224e-02
## 740              4.990025e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 859  GO:0009414            8.959736e-12                1.0000000         32
## 594  GO:0006833            1.887099e-10                1.0000000          9
## 359  GO:0006355            3.154466e-09                1.0000000         75
## 739  GO:0007623            1.097169e-06                0.9999998         13
## 1434 GO:0015670            1.786928e-06                1.0000000          3
## 2198 GO:0042128            2.593663e-06                0.9999997          9
## 1195 GO:0010167            3.616853e-06                0.9999997          7
## 2913 GO:0055085            5.429333e-06                0.9999983         24
## 897  GO:0009624            3.801918e-05                0.9999929         11
## 921  GO:0009651            3.971408e-05                0.9999841         30
## 1023 GO:0009853            9.846845e-05                0.9999881          7
##      numInCat                                       term ontology
## 859       596              response to water deprivation       BP
## 594        34                            water transport       BP
## 359      2992 regulation of transcription, DNA-templated       BP
## 739       181                           circadian rhythm       BP
## 1434        3                   carbon dioxide transport       BP
## 2198       81                       nitrate assimilation       BP
## 1195       53                        response to nitrate       BP
## 2913      562                    transmembrane transport       BP
## 897       176                       response to nematode       BP
## 921      1045                    response to salt stress       BP
## 1023      104                           photorespiration       BP
##      over_represented_padjust
## 859              3.394844e-08
## 594              3.575108e-07
## 359              3.984091e-06
## 739              1.039293e-03
## 1434             1.354134e-03
## 2198             1.637898e-03
## 1195             1.957751e-03
## 2913             2.571468e-03
## 897              1.504767e-02
## 921              1.504767e-02
## 1023             3.391790e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 914  GO:0009644            1.723508e-13                1.0000000         28
## 2239 GO:0042542            1.376343e-11                1.0000000         28
## 2795 GO:0051259            1.634665e-11                1.0000000         12
## 854  GO:0009408            1.130203e-09                1.0000000         42
## 58   GO:0000302            1.703843e-08                1.0000000         20
## 1270 GO:0010286            5.509471e-08                1.0000000         20
## 359  GO:0006355            1.641735e-07                1.0000000        205
## 744  GO:0008150            5.601514e-07                0.9999998        381
## 2973 GO:0061408            2.022432e-06                0.9999997         12
## 2465 GO:0045736            1.633302e-05                0.9999992          6
## 2088 GO:0034605            1.867923e-05                0.9999951         18
## 678  GO:0007050            3.200089e-05                0.9999981          6
## 2917 GO:0055114            1.058852e-04                0.9999283        133
## 1445 GO:0015706            1.509860e-04                0.9999755          9
##      numInCat
## 914       109
## 2239      130
## 2795       22
## 854       305
## 58         96
## 1270       99
## 359      2992
## 744      6445
## 2973       47
## 2465       14
## 2088      117
## 678        15
## 2917     1923
## 1445       39
##                                                                                                 term
## 914                                                                 response to high light intensity
## 2239                                                                   response to hydrogen peroxide
## 2795                                                                 protein complex oligomerization
## 854                                                                                 response to heat
## 58                                                               response to reactive oxygen species
## 1270                                                                                heat acclimation
## 359                                                       regulation of transcription, DNA-templated
## 744                                                                               biological_process
## 2973 positive regulation of transcription from RNA polymerase II promoter in response to heat stress
## 2465                negative regulation of cyclin-dependent protein serine/threonine kinase activity
## 2088                                                                       cellular response to heat
## 678                                                                                cell cycle arrest
## 2917                                                                     oxidation-reduction process
## 1445                                                                               nitrate transport
##      ontology over_represented_padjust
## 914        BP             6.530372e-10
## 2239       BP             2.064582e-08
## 2795       BP             2.064582e-08
## 854        BP             1.070585e-06
## 58         BP             1.291172e-05
## 1270       BP             3.479231e-05
## 359        BP             8.886477e-05
## 744        BP             2.653017e-04
## 2973       BP             8.514440e-04
## 2465       BP             6.188581e-03
## 2088       BP             6.434146e-03
## 678        BP             1.010428e-02
## 2917       BP             3.086146e-02
## 1445       BP             4.086327e-02
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 862  GO:0009423            1.024617e-07                1.0000000          7
## 1210 GO:0010200            2.693419e-07                0.9999999         21
## 782  GO:0009073            1.634045e-06                0.9999999          7
## 254  GO:0006096            5.125682e-06                0.9999990         14
## 153  GO:0001666            2.146923e-05                0.9999967         10
## 1183 GO:0010150            3.261610e-05                0.9999915         16
## 155  GO:0001678            8.967007e-05                0.9999971          4
##      numInCat                                            term ontology
## 862        16                 chorismate biosynthetic process       BP
## 1210      286                              response to chitin       BP
## 782        24 aromatic amino acid family biosynthetic process       BP
## 254       138                              glycolytic process       BP
## 153        83                             response to hypoxia       BP
## 1183      219                                 leaf senescence       BP
## 155        10                    cellular glucose homeostasis       BP
##      over_represented_padjust
## 862              0.0003882275
## 1210             0.0005102682
## 782              0.0020637991
## 254              0.0048553023
## 153              0.0162693858
## 1183             0.0205970672
## 155              0.0485371255
## Warning in pcls(G): initial point very close to some inequality constraints

## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns

## [1] "enriched.GO is"
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 663  GO:0007018            5.321387e-14                1.0000000         19
## 688  GO:0007088            1.879943e-09                1.0000000         11
## 1380 GO:0010583            4.325007e-09                1.0000000         11
## 130  GO:0000914            4.412960e-09                1.0000000          6
## 2471 GO:0045787            7.551536e-09                1.0000000         11
## 22   GO:0000079            1.457876e-08                1.0000000         11
## 704  GO:0007142            1.807141e-08                1.0000000          5
## 753  GO:0008284            2.393789e-08                1.0000000         12
## 2860 GO:0051726            3.459615e-07                1.0000000         13
## 50   GO:0000278            6.681427e-07                0.9999999         13
## 343  GO:0006325            1.121667e-06                0.9999999         11
## 2803 GO:0051301            1.864926e-06                0.9999995         21
## 128  GO:0000911            3.477490e-05                0.9999982          5
## 1712 GO:0019538            4.825314e-05                0.9999961          6
## 3277 GO:0080175            5.382854e-05                0.9999995          3
## 1709 GO:0019510            5.840923e-05                0.9999995          3
## 739  GO:0007623            8.636543e-05                0.9999823         11
## 3035 GO:0070828            1.242185e-04                0.9999978          3
## 1996 GO:0033353            1.998934e-04                0.9999963          3
## 636  GO:0006949            2.372055e-04                0.9999878          4
##      numInCat
## 663       108
## 688        61
## 1380       67
## 130        10
## 2471       68
## 22         76
## 704         7
## 753       100
## 2860      151
## 50        159
## 343       120
## 2803      419
## 128        26
## 1712       39
## 3277        5
## 1709        5
## 739       181
## 3035       10
## 1996        7
## 636        25
##                                                                         term
## 663                                               microtubule-based movement
## 688                                   regulation of mitotic nuclear division
## 1380                                              response to cyclopentenone
## 130                                                    phragmoplast assembly
## 2471                                       positive regulation of cell cycle
## 22   regulation of cyclin-dependent protein serine/threonine kinase activity
## 704                                                          male meiosis II
## 753                                positive regulation of cell proliferation
## 2860                                                regulation of cell cycle
## 50                                                        mitotic cell cycle
## 343                                                   chromatin organization
## 2803                                                           cell division
## 128                                      cytokinesis by cell plate formation
## 1712                                               protein metabolic process
## 3277                                   phragmoplast microtubule organization
## 1709                                S-adenosylhomocysteine catabolic process
## 739                                                         circadian rhythm
## 3035                                            heterochromatin organization
## 1996                                              S-adenosylmethionine cycle
## 636                                                      syncytium formation
##      ontology over_represented_padjust
## 663        BP             2.016273e-10
## 688        BP             3.561553e-06
## 1380       BP             4.180177e-06
## 130        BP             4.180177e-06
## 2471       BP             5.722554e-06
## 22         BP             9.206487e-06
## 704        BP             9.781795e-06
## 753        BP             1.133758e-05
## 2860       BP             1.456498e-04
## 50         BP             2.531593e-04
## 343        BP             3.863632e-04
## 2803       BP             5.888503e-04
## 128        BP             1.013555e-02
## 1712       BP             1.305937e-02
## 3277       BP             1.359709e-02
## 1709       BP             1.383204e-02
## 739        BP             1.924933e-02
## 3035       BP             2.614799e-02
## 1996       BP             3.986295e-02
## 636        BP             4.493858e-02
# using unnest()
temp %>% unnest(GO_result)  %>% write_csv(path="../output/diurnal1314.time.DEG.Kmeans.15cluster.csv")